Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2006 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Keith Packard <keithp@keithp.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. /*
  31.  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
  32.  */
  33.  
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <assert.h>
  38. #include <math.h>
  39.  
  40. #include <va/va_drmcommon.h>
  41.  
  42. #include "intel_batchbuffer.h"
  43. #include "intel_driver.h"
  44. #include "i965_defines.h"
  45. #include "i965_drv_video.h"
  46. #include "i965_structs.h"
  47. #include "i965_yuv_coefs.h"
  48.  
  49. #include "i965_render.h"
  50.  
  51. #define SF_KERNEL_NUM_GRF       16
  52. #define SF_MAX_THREADS          1
  53.  
  54. static const uint32_t sf_kernel_static[][4] =
  55. {
  56. #include "shaders/render/exa_sf.g4b"
  57. };
  58.  
  59. #define PS_KERNEL_NUM_GRF       48
  60. #define PS_MAX_THREADS          32
  61.  
  62. #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
  63.  
  64. static const uint32_t ps_kernel_static[][4] =
  65. {
  66. #include "shaders/render/exa_wm_xy.g4b"
  67. #include "shaders/render/exa_wm_src_affine.g4b"
  68. #include "shaders/render/exa_wm_src_sample_planar.g4b"
  69. #include "shaders/render/exa_wm_yuv_color_balance.g4b"
  70. #include "shaders/render/exa_wm_yuv_rgb.g4b"
  71. #include "shaders/render/exa_wm_write.g4b"
  72. };
  73. static const uint32_t ps_subpic_kernel_static[][4] =
  74. {
  75. #include "shaders/render/exa_wm_xy.g4b"
  76. #include "shaders/render/exa_wm_src_affine.g4b"
  77. #include "shaders/render/exa_wm_src_sample_argb.g4b"
  78. #include "shaders/render/exa_wm_write.g4b"
  79. };
  80.  
  81. /* On IRONLAKE */
  82. static const uint32_t sf_kernel_static_gen5[][4] =
  83. {
  84. #include "shaders/render/exa_sf.g4b.gen5"
  85. };
  86.  
  87. static const uint32_t ps_kernel_static_gen5[][4] =
  88. {
  89. #include "shaders/render/exa_wm_xy.g4b.gen5"
  90. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  91. #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
  92. #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
  93. #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
  94. #include "shaders/render/exa_wm_write.g4b.gen5"
  95. };
  96. static const uint32_t ps_subpic_kernel_static_gen5[][4] =
  97. {
  98. #include "shaders/render/exa_wm_xy.g4b.gen5"
  99. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  100. #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
  101. #include "shaders/render/exa_wm_write.g4b.gen5"
  102. };
  103.  
  104. /* programs for Sandybridge */
  105. static const uint32_t sf_kernel_static_gen6[][4] =
  106. {
  107. };
  108.  
  109. static const uint32_t ps_kernel_static_gen6[][4] = {
  110. #include "shaders/render/exa_wm_src_affine.g6b"
  111. #include "shaders/render/exa_wm_src_sample_planar.g6b"
  112. #include "shaders/render/exa_wm_yuv_color_balance.g6b"
  113. #include "shaders/render/exa_wm_yuv_rgb.g6b"
  114. #include "shaders/render/exa_wm_write.g6b"
  115. };
  116.  
  117. static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
  118. #include "shaders/render/exa_wm_src_affine.g6b"
  119. #include "shaders/render/exa_wm_src_sample_argb.g6b"
  120. #include "shaders/render/exa_wm_write.g6b"
  121. };
  122.  
  123. /* programs for Ivybridge */
  124. static const uint32_t sf_kernel_static_gen7[][4] =
  125. {
  126. };
  127.  
  128. static const uint32_t ps_kernel_static_gen7[][4] = {
  129. #include "shaders/render/exa_wm_src_affine.g7b"
  130. #include "shaders/render/exa_wm_src_sample_planar.g7b"
  131. #include "shaders/render/exa_wm_yuv_color_balance.g7b"
  132. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  133. #include "shaders/render/exa_wm_write.g7b"
  134. };
  135.  
  136. static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
  137. #include "shaders/render/exa_wm_src_affine.g7b"
  138. #include "shaders/render/exa_wm_src_sample_argb.g7b"
  139. #include "shaders/render/exa_wm_write.g7b"
  140. };
  141.  
  142. /* Programs for Haswell */
  143. static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
  144. #include "shaders/render/exa_wm_src_affine.g7b"
  145. #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
  146. #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
  147. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  148. #include "shaders/render/exa_wm_write.g7b"
  149. };
  150.  
  151.  
  152. #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
  153.  
  154. #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
  155. #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
  156.  
  157. static uint32_t float_to_uint (float f)
  158. {
  159.     union {
  160.         uint32_t i;
  161.         float f;
  162.     } x;
  163.  
  164.     x.f = f;
  165.     return x.i;
  166. }
  167.  
  168. enum
  169. {
  170.     SF_KERNEL = 0,
  171.     PS_KERNEL,
  172.     PS_SUBPIC_KERNEL
  173. };
  174.  
  175. static struct i965_kernel render_kernels_gen4[] = {
  176.     {
  177.         "SF",
  178.         SF_KERNEL,
  179.         sf_kernel_static,
  180.         sizeof(sf_kernel_static),
  181.         NULL
  182.     },
  183.     {
  184.         "PS",
  185.         PS_KERNEL,
  186.         ps_kernel_static,
  187.         sizeof(ps_kernel_static),
  188.         NULL
  189.     },
  190.  
  191.     {
  192.         "PS_SUBPIC",
  193.         PS_SUBPIC_KERNEL,
  194.         ps_subpic_kernel_static,
  195.         sizeof(ps_subpic_kernel_static),
  196.         NULL
  197.     }
  198. };
  199.  
  200. static struct i965_kernel render_kernels_gen5[] = {
  201.     {
  202.         "SF",
  203.         SF_KERNEL,
  204.         sf_kernel_static_gen5,
  205.         sizeof(sf_kernel_static_gen5),
  206.         NULL
  207.     },
  208.     {
  209.         "PS",
  210.         PS_KERNEL,
  211.         ps_kernel_static_gen5,
  212.         sizeof(ps_kernel_static_gen5),
  213.         NULL
  214.     },
  215.  
  216.     {
  217.         "PS_SUBPIC",
  218.         PS_SUBPIC_KERNEL,
  219.         ps_subpic_kernel_static_gen5,
  220.         sizeof(ps_subpic_kernel_static_gen5),
  221.         NULL
  222.     }
  223. };
  224.  
  225. static struct i965_kernel render_kernels_gen6[] = {
  226.     {
  227.         "SF",
  228.         SF_KERNEL,
  229.         sf_kernel_static_gen6,
  230.         sizeof(sf_kernel_static_gen6),
  231.         NULL
  232.     },
  233.     {
  234.         "PS",
  235.         PS_KERNEL,
  236.         ps_kernel_static_gen6,
  237.         sizeof(ps_kernel_static_gen6),
  238.         NULL
  239.     },
  240.  
  241.     {
  242.         "PS_SUBPIC",
  243.         PS_SUBPIC_KERNEL,
  244.         ps_subpic_kernel_static_gen6,
  245.         sizeof(ps_subpic_kernel_static_gen6),
  246.         NULL
  247.     }
  248. };
  249.  
  250. static struct i965_kernel render_kernels_gen7[] = {
  251.     {
  252.         "SF",
  253.         SF_KERNEL,
  254.         sf_kernel_static_gen7,
  255.         sizeof(sf_kernel_static_gen7),
  256.         NULL
  257.     },
  258.     {
  259.         "PS",
  260.         PS_KERNEL,
  261.         ps_kernel_static_gen7,
  262.         sizeof(ps_kernel_static_gen7),
  263.         NULL
  264.     },
  265.  
  266.     {
  267.         "PS_SUBPIC",
  268.         PS_SUBPIC_KERNEL,
  269.         ps_subpic_kernel_static_gen7,
  270.         sizeof(ps_subpic_kernel_static_gen7),
  271.         NULL
  272.     }
  273. };
  274.  
  275. static struct i965_kernel render_kernels_gen7_haswell[] = {
  276.     {
  277.         "SF",
  278.         SF_KERNEL,
  279.         sf_kernel_static_gen7,
  280.         sizeof(sf_kernel_static_gen7),
  281.         NULL
  282.     },
  283.     {
  284.         "PS",
  285.         PS_KERNEL,
  286.         ps_kernel_static_gen7_haswell,
  287.         sizeof(ps_kernel_static_gen7_haswell),
  288.         NULL
  289.     },
  290.  
  291.     {
  292.         "PS_SUBPIC",
  293.         PS_SUBPIC_KERNEL,
  294.         ps_subpic_kernel_static_gen7,
  295.         sizeof(ps_subpic_kernel_static_gen7),
  296.         NULL
  297.     }
  298. };
  299.  
  300. #define URB_VS_ENTRIES        8
  301. #define URB_VS_ENTRY_SIZE     1
  302.  
  303. #define URB_GS_ENTRIES        0
  304. #define URB_GS_ENTRY_SIZE     0
  305.  
  306. #define URB_CLIP_ENTRIES      0
  307. #define URB_CLIP_ENTRY_SIZE   0
  308.  
  309. #define URB_SF_ENTRIES        1
  310. #define URB_SF_ENTRY_SIZE     2
  311.  
  312. #define URB_CS_ENTRIES        4
  313. #define URB_CS_ENTRY_SIZE     4
  314.  
  315. static void
  316. i965_render_vs_unit(VADriverContextP ctx)
  317. {
  318.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  319.     struct i965_render_state *render_state = &i965->render_state;
  320.     struct i965_vs_unit_state *vs_state;
  321.  
  322.     dri_bo_map(render_state->vs.state, 1);
  323.     assert(render_state->vs.state->virtual);
  324.     vs_state = render_state->vs.state->virtual;
  325.     memset(vs_state, 0, sizeof(*vs_state));
  326.  
  327.     if (IS_IRONLAKE(i965->intel.device_info))
  328.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
  329.     else
  330.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
  331.  
  332.     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
  333.     vs_state->vs6.vs_enable = 0;
  334.     vs_state->vs6.vert_cache_disable = 1;
  335.    
  336.     dri_bo_unmap(render_state->vs.state);
  337. }
  338.  
  339. static void
  340. i965_render_sf_unit(VADriverContextP ctx)
  341. {
  342.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  343.     struct i965_render_state *render_state = &i965->render_state;
  344.     struct i965_sf_unit_state *sf_state;
  345.  
  346.     dri_bo_map(render_state->sf.state, 1);
  347.     assert(render_state->sf.state->virtual);
  348.     sf_state = render_state->sf.state->virtual;
  349.     memset(sf_state, 0, sizeof(*sf_state));
  350.  
  351.     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
  352.     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
  353.  
  354.     sf_state->sf1.single_program_flow = 1; /* XXX */
  355.     sf_state->sf1.binding_table_entry_count = 0;
  356.     sf_state->sf1.thread_priority = 0;
  357.     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
  358.     sf_state->sf1.illegal_op_exception_enable = 1;
  359.     sf_state->sf1.mask_stack_exception_enable = 1;
  360.     sf_state->sf1.sw_exception_enable = 1;
  361.  
  362.     /* scratch space is not used in our kernel */
  363.     sf_state->thread2.per_thread_scratch_space = 0;
  364.     sf_state->thread2.scratch_space_base_pointer = 0;
  365.  
  366.     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
  367.     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
  368.     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
  369.     sf_state->thread3.urb_entry_read_offset = 0;
  370.     sf_state->thread3.dispatch_grf_start_reg = 3;
  371.  
  372.     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
  373.     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
  374.     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
  375.     sf_state->thread4.stats_enable = 1;
  376.  
  377.     sf_state->sf5.viewport_transform = 0; /* skip viewport */
  378.  
  379.     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
  380.     sf_state->sf6.scissor = 0;
  381.  
  382.     sf_state->sf7.trifan_pv = 2;
  383.  
  384.     sf_state->sf6.dest_org_vbias = 0x8;
  385.     sf_state->sf6.dest_org_hbias = 0x8;
  386.  
  387.     dri_bo_emit_reloc(render_state->sf.state,
  388.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  389.                       sf_state->thread0.grf_reg_count << 1,
  390.                       offsetof(struct i965_sf_unit_state, thread0),
  391.                       render_state->render_kernels[SF_KERNEL].bo);
  392.  
  393.     dri_bo_unmap(render_state->sf.state);
  394. }
  395.  
  396. static void
  397. i965_render_sampler(VADriverContextP ctx)
  398. {
  399.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  400.     struct i965_render_state *render_state = &i965->render_state;
  401.     struct i965_sampler_state *sampler_state;
  402.     int i;
  403.    
  404.     assert(render_state->wm.sampler_count > 0);
  405.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  406.  
  407.     dri_bo_map(render_state->wm.sampler, 1);
  408.     assert(render_state->wm.sampler->virtual);
  409.     sampler_state = render_state->wm.sampler->virtual;
  410.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  411.         memset(sampler_state, 0, sizeof(*sampler_state));
  412.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  413.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  414.         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  415.         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  416.         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  417.         sampler_state++;
  418.     }
  419.  
  420.     dri_bo_unmap(render_state->wm.sampler);
  421. }
  422. static void
  423. i965_subpic_render_wm_unit(VADriverContextP ctx)
  424. {
  425.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  426.     struct i965_render_state *render_state = &i965->render_state;
  427.     struct i965_wm_unit_state *wm_state;
  428.  
  429.     assert(render_state->wm.sampler);
  430.  
  431.     dri_bo_map(render_state->wm.state, 1);
  432.     assert(render_state->wm.state->virtual);
  433.     wm_state = render_state->wm.state->virtual;
  434.     memset(wm_state, 0, sizeof(*wm_state));
  435.  
  436.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  437.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
  438.  
  439.     wm_state->thread1.single_program_flow = 1; /* XXX */
  440.  
  441.     if (IS_IRONLAKE(i965->intel.device_info))
  442.         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
  443.     else
  444.         wm_state->thread1.binding_table_entry_count = 7;
  445.  
  446.     wm_state->thread2.scratch_space_base_pointer = 0;
  447.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  448.  
  449.     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
  450.     wm_state->thread3.const_urb_entry_read_length = 4;
  451.     wm_state->thread3.const_urb_entry_read_offset = 0;
  452.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  453.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  454.  
  455.     wm_state->wm4.stats_enable = 0;
  456.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  457.  
  458.     if (IS_IRONLAKE(i965->intel.device_info)) {
  459.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  460.     } else {
  461.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  462.     }
  463.  
  464.     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
  465.     wm_state->wm5.thread_dispatch_enable = 1;
  466.     wm_state->wm5.enable_16_pix = 1;
  467.     wm_state->wm5.enable_8_pix = 0;
  468.     wm_state->wm5.early_depth_test = 1;
  469.  
  470.     dri_bo_emit_reloc(render_state->wm.state,
  471.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  472.                       wm_state->thread0.grf_reg_count << 1,
  473.                       offsetof(struct i965_wm_unit_state, thread0),
  474.                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
  475.  
  476.     dri_bo_emit_reloc(render_state->wm.state,
  477.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  478.                       wm_state->wm4.sampler_count << 2,
  479.                       offsetof(struct i965_wm_unit_state, wm4),
  480.                       render_state->wm.sampler);
  481.  
  482.     dri_bo_unmap(render_state->wm.state);
  483. }
  484.  
  485.  
  486. static void
  487. i965_render_wm_unit(VADriverContextP ctx)
  488. {
  489.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  490.     struct i965_render_state *render_state = &i965->render_state;
  491.     struct i965_wm_unit_state *wm_state;
  492.  
  493.     assert(render_state->wm.sampler);
  494.  
  495.     dri_bo_map(render_state->wm.state, 1);
  496.     assert(render_state->wm.state->virtual);
  497.     wm_state = render_state->wm.state->virtual;
  498.     memset(wm_state, 0, sizeof(*wm_state));
  499.  
  500.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  501.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
  502.  
  503.     wm_state->thread1.single_program_flow = 1; /* XXX */
  504.  
  505.     if (IS_IRONLAKE(i965->intel.device_info))
  506.         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
  507.     else
  508.         wm_state->thread1.binding_table_entry_count = 7;
  509.  
  510.     wm_state->thread2.scratch_space_base_pointer = 0;
  511.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  512.  
  513.     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
  514.     wm_state->thread3.const_urb_entry_read_length = 4;
  515.     wm_state->thread3.const_urb_entry_read_offset = 0;
  516.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  517.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  518.  
  519.     wm_state->wm4.stats_enable = 0;
  520.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  521.  
  522.     if (IS_IRONLAKE(i965->intel.device_info)) {
  523.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  524.     } else {
  525.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  526.     }
  527.  
  528.     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
  529.     wm_state->wm5.thread_dispatch_enable = 1;
  530.     wm_state->wm5.enable_16_pix = 1;
  531.     wm_state->wm5.enable_8_pix = 0;
  532.     wm_state->wm5.early_depth_test = 1;
  533.  
  534.     dri_bo_emit_reloc(render_state->wm.state,
  535.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  536.                       wm_state->thread0.grf_reg_count << 1,
  537.                       offsetof(struct i965_wm_unit_state, thread0),
  538.                       render_state->render_kernels[PS_KERNEL].bo);
  539.  
  540.     dri_bo_emit_reloc(render_state->wm.state,
  541.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  542.                       wm_state->wm4.sampler_count << 2,
  543.                       offsetof(struct i965_wm_unit_state, wm4),
  544.                       render_state->wm.sampler);
  545.  
  546.     dri_bo_unmap(render_state->wm.state);
  547. }
  548.  
  549. static void
  550. i965_render_cc_viewport(VADriverContextP ctx)
  551. {
  552.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  553.     struct i965_render_state *render_state = &i965->render_state;
  554.     struct i965_cc_viewport *cc_viewport;
  555.  
  556.     dri_bo_map(render_state->cc.viewport, 1);
  557.     assert(render_state->cc.viewport->virtual);
  558.     cc_viewport = render_state->cc.viewport->virtual;
  559.     memset(cc_viewport, 0, sizeof(*cc_viewport));
  560.    
  561.     cc_viewport->min_depth = -1.e35;
  562.     cc_viewport->max_depth = 1.e35;
  563.  
  564.     dri_bo_unmap(render_state->cc.viewport);
  565. }
  566.  
  567. static void
  568. i965_subpic_render_cc_unit(VADriverContextP ctx)
  569. {
  570.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  571.     struct i965_render_state *render_state = &i965->render_state;
  572.     struct i965_cc_unit_state *cc_state;
  573.  
  574.     assert(render_state->cc.viewport);
  575.  
  576.     dri_bo_map(render_state->cc.state, 1);
  577.     assert(render_state->cc.state->virtual);
  578.     cc_state = render_state->cc.state->virtual;
  579.     memset(cc_state, 0, sizeof(*cc_state));
  580.  
  581.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  582.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  583.     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
  584.     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
  585.     cc_state->cc3.blend_enable = 1;     /* enable color blend */
  586.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  587.     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
  588.     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
  589.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  590.  
  591.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  592.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  593.     cc_state->cc5.statistics_enable = 1;
  594.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  595.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  596.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  597.  
  598.     cc_state->cc6.clamp_post_alpha_blend = 0;
  599.     cc_state->cc6.clamp_pre_alpha_blend  =0;
  600.    
  601.     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
  602.     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
  603.     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  604.     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  605.    
  606.     /*alpha test reference*/
  607.     cc_state->cc7.alpha_ref.f =0.0 ;
  608.  
  609.  
  610.     dri_bo_emit_reloc(render_state->cc.state,
  611.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  612.                       0,
  613.                       offsetof(struct i965_cc_unit_state, cc4),
  614.                       render_state->cc.viewport);
  615.  
  616.     dri_bo_unmap(render_state->cc.state);
  617. }
  618.  
  619.  
  620. static void
  621. i965_render_cc_unit(VADriverContextP ctx)
  622. {
  623.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  624.     struct i965_render_state *render_state = &i965->render_state;
  625.     struct i965_cc_unit_state *cc_state;
  626.  
  627.     assert(render_state->cc.viewport);
  628.  
  629.     dri_bo_map(render_state->cc.state, 1);
  630.     assert(render_state->cc.state->virtual);
  631.     cc_state = render_state->cc.state->virtual;
  632.     memset(cc_state, 0, sizeof(*cc_state));
  633.  
  634.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  635.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  636.     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
  637.     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
  638.     cc_state->cc3.blend_enable = 0;     /* disable color blend */
  639.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  640.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  641.  
  642.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  643.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  644.     cc_state->cc5.statistics_enable = 1;
  645.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  646.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
  647.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
  648.  
  649.     dri_bo_emit_reloc(render_state->cc.state,
  650.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  651.                       0,
  652.                       offsetof(struct i965_cc_unit_state, cc4),
  653.                       render_state->cc.viewport);
  654.  
  655.     dri_bo_unmap(render_state->cc.state);
  656. }
  657.  
  658. static void
  659. i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
  660. {
  661.     switch (tiling) {
  662.     case I915_TILING_NONE:
  663.         ss->ss3.tiled_surface = 0;
  664.         ss->ss3.tile_walk = 0;
  665.         break;
  666.     case I915_TILING_X:
  667.         ss->ss3.tiled_surface = 1;
  668.         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
  669.         break;
  670.     case I915_TILING_Y:
  671.         ss->ss3.tiled_surface = 1;
  672.         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
  673.         break;
  674.     }
  675. }
  676.  
  677. static void
  678. i965_render_set_surface_state(
  679.     struct i965_surface_state *ss,
  680.     dri_bo                    *bo,
  681.     unsigned long              offset,
  682.     unsigned int               width,
  683.     unsigned int               height,
  684.     unsigned int               pitch,
  685.     unsigned int               format,
  686.     unsigned int               flags
  687. )
  688. {
  689.     unsigned int tiling;
  690.     unsigned int swizzle;
  691.  
  692.     memset(ss, 0, sizeof(*ss));
  693.  
  694.     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
  695.     case VA_BOTTOM_FIELD:
  696.         ss->ss0.vert_line_stride_ofs = 1;
  697.         /* fall-through */
  698.     case VA_TOP_FIELD:
  699.         ss->ss0.vert_line_stride = 1;
  700.         height /= 2;
  701.         break;
  702.     }
  703.  
  704.     ss->ss0.surface_type = I965_SURFACE_2D;
  705.     ss->ss0.surface_format = format;
  706.     ss->ss0.color_blend = 1;
  707.  
  708.     ss->ss1.base_addr = bo->offset + offset;
  709.  
  710.     ss->ss2.width = width - 1;
  711.     ss->ss2.height = height - 1;
  712.  
  713.     ss->ss3.pitch = pitch - 1;
  714.  
  715.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  716.     i965_render_set_surface_tiling(ss, tiling);
  717. }
  718.  
  719. static void
  720. gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
  721. {
  722.    switch (tiling) {
  723.    case I915_TILING_NONE:
  724.       ss->ss0.tiled_surface = 0;
  725.       ss->ss0.tile_walk = 0;
  726.       break;
  727.    case I915_TILING_X:
  728.       ss->ss0.tiled_surface = 1;
  729.       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  730.       break;
  731.    case I915_TILING_Y:
  732.       ss->ss0.tiled_surface = 1;
  733.       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  734.       break;
  735.    }
  736. }
  737.  
  738. /* Set "Shader Channel Select" */
  739. void
  740. gen7_render_set_surface_scs(struct gen7_surface_state *ss)
  741. {
  742.     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
  743.     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
  744.     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
  745.     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
  746. }
  747.  
  748. static void
  749. gen7_render_set_surface_state(
  750.     struct gen7_surface_state *ss,
  751.     dri_bo                    *bo,
  752.     unsigned long              offset,
  753.     int                        width,
  754.     int                        height,
  755.     int                        pitch,
  756.     int                        format,
  757.     unsigned int               flags
  758. )
  759. {
  760.     unsigned int tiling;
  761.     unsigned int swizzle;
  762.  
  763.     memset(ss, 0, sizeof(*ss));
  764.  
  765.     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
  766.     case VA_BOTTOM_FIELD:
  767.         ss->ss0.vert_line_stride_ofs = 1;
  768.         /* fall-through */
  769.     case VA_TOP_FIELD:
  770.         ss->ss0.vert_line_stride = 1;
  771.         height /= 2;
  772.         break;
  773.     }
  774.  
  775.     ss->ss0.surface_type = I965_SURFACE_2D;
  776.     ss->ss0.surface_format = format;
  777.  
  778.     ss->ss1.base_addr = bo->offset + offset;
  779.  
  780.     ss->ss2.width = width - 1;
  781.     ss->ss2.height = height - 1;
  782.  
  783.     ss->ss3.pitch = pitch - 1;
  784.  
  785.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  786.     gen7_render_set_surface_tiling(ss, tiling);
  787. }
  788.  
  789.  
  790. static void
  791. i965_render_src_surface_state(
  792.     VADriverContextP ctx,
  793.     int              index,
  794.     dri_bo          *region,
  795.     unsigned long    offset,
  796.     int              w,
  797.     int              h,
  798.     int              pitch,
  799.     int              format,
  800.     unsigned int     flags
  801. )
  802. {
  803.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  804.     struct i965_render_state *render_state = &i965->render_state;
  805.     void *ss;
  806.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  807.  
  808.     assert(index < MAX_RENDER_SURFACES);
  809.  
  810.     dri_bo_map(ss_bo, 1);
  811.     assert(ss_bo->virtual);
  812.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  813.  
  814.     if (IS_GEN7(i965->intel.device_info)) {
  815.         gen7_render_set_surface_state(ss,
  816.                                       region, offset,
  817.                                       w, h,
  818.                                       pitch, format, flags);
  819.         if (IS_HASWELL(i965->intel.device_info))
  820.             gen7_render_set_surface_scs(ss);
  821.         dri_bo_emit_reloc(ss_bo,
  822.                           I915_GEM_DOMAIN_SAMPLER, 0,
  823.                           offset,
  824.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  825.                           region);
  826.     } else {
  827.         i965_render_set_surface_state(ss,
  828.                                       region, offset,
  829.                                       w, h,
  830.                                       pitch, format, flags);
  831.         dri_bo_emit_reloc(ss_bo,
  832.                           I915_GEM_DOMAIN_SAMPLER, 0,
  833.                           offset,
  834.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  835.                           region);
  836.     }
  837.  
  838.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  839.     dri_bo_unmap(ss_bo);
  840.     render_state->wm.sampler_count++;
  841. }
  842.  
  843. static void
  844. i965_render_src_surfaces_state(
  845.     VADriverContextP ctx,
  846.     struct object_surface *obj_surface,
  847.     unsigned int     flags
  848. )
  849. {
  850.     int region_pitch;
  851.     int rw, rh;
  852.     dri_bo *region;
  853.  
  854.     region_pitch = obj_surface->width;
  855.     rw = obj_surface->orig_width;
  856.     rh = obj_surface->orig_height;
  857.     region = obj_surface->bo;
  858.  
  859.     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
  860.     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
  861.  
  862.     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
  863.         return;
  864.  
  865.     if (obj_surface->fourcc == VA_FOURCC_NV12) {
  866.         i965_render_src_surface_state(ctx, 3, region,
  867.                                       region_pitch * obj_surface->y_cb_offset,
  868.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  869.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
  870.         i965_render_src_surface_state(ctx, 4, region,
  871.                                       region_pitch * obj_surface->y_cb_offset,
  872.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  873.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
  874.     } else {
  875.         i965_render_src_surface_state(ctx, 3, region,
  876.                                       region_pitch * obj_surface->y_cb_offset,
  877.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  878.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
  879.         i965_render_src_surface_state(ctx, 4, region,
  880.                                       region_pitch * obj_surface->y_cb_offset,
  881.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  882.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  883.         i965_render_src_surface_state(ctx, 5, region,
  884.                                       region_pitch * obj_surface->y_cr_offset,
  885.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  886.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
  887.         i965_render_src_surface_state(ctx, 6, region,
  888.                                       region_pitch * obj_surface->y_cr_offset,
  889.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  890.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  891.     }
  892. }
  893.  
  894. static void
  895. i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
  896.                                       struct object_surface *obj_surface)
  897. {
  898.     dri_bo *subpic_region;
  899.     unsigned int index = obj_surface->subpic_render_idx;
  900.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  901.     struct object_image *obj_image = obj_subpic->obj_image;
  902.  
  903.     assert(obj_surface);
  904.     assert(obj_surface->bo);
  905.     subpic_region = obj_image->bo;
  906.     /*subpicture surface*/
  907.     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  908.     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  909. }
  910.  
  911. static void
  912. i965_render_dest_surface_state(VADriverContextP ctx, int index)
  913. {
  914.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  915.     struct i965_render_state *render_state = &i965->render_state;
  916.     struct intel_region *dest_region = render_state->draw_region;
  917.     void *ss;
  918.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  919.     int format;
  920.     assert(index < MAX_RENDER_SURFACES);
  921.  
  922.     if (dest_region->cpp == 2) {
  923.         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
  924.     } else {
  925.         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
  926.     }
  927.  
  928.     dri_bo_map(ss_bo, 1);
  929.     assert(ss_bo->virtual);
  930.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  931.  
  932.     if (IS_GEN7(i965->intel.device_info)) {
  933.         gen7_render_set_surface_state(ss,
  934.                                       dest_region->bo, 0,
  935.                                       dest_region->width, dest_region->height,
  936.                                       dest_region->pitch, format, 0);
  937.         if (IS_HASWELL(i965->intel.device_info))
  938.             gen7_render_set_surface_scs(ss);
  939.         dri_bo_emit_reloc(ss_bo,
  940.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  941.                           0,
  942.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  943.                           dest_region->bo);
  944.     } else {
  945.         i965_render_set_surface_state(ss,
  946.                                       dest_region->bo, 0,
  947.                                       dest_region->width, dest_region->height,
  948.                                       dest_region->pitch, format, 0);
  949.         dri_bo_emit_reloc(ss_bo,
  950.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  951.                           0,
  952.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  953.                           dest_region->bo);
  954.     }
  955.  
  956.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  957.     dri_bo_unmap(ss_bo);
  958. }
  959.  
  960. static void
  961. i965_fill_vertex_buffer(
  962.     VADriverContextP ctx,
  963.     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
  964.     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
  965. )
  966. {
  967.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  968.     float vb[12];
  969.  
  970.     enum { X1, Y1, X2, Y2 };
  971.  
  972.     static const unsigned int g_rotation_indices[][6] = {
  973.         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
  974.         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
  975.         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
  976.         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
  977.     };
  978.  
  979.     const unsigned int * const rotation_indices =
  980.         g_rotation_indices[i965->rotation_attrib->value];
  981.  
  982.     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
  983.     vb[1]  = tex_coords[rotation_indices[1]];
  984.     vb[2]  = vid_coords[X2];
  985.     vb[3]  = vid_coords[Y2];
  986.  
  987.     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
  988.     vb[5]  = tex_coords[rotation_indices[3]];
  989.     vb[6]  = vid_coords[X1];
  990.     vb[7]  = vid_coords[Y2];
  991.  
  992.     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
  993.     vb[9]  = tex_coords[rotation_indices[5]];
  994.     vb[10] = vid_coords[X1];
  995.     vb[11] = vid_coords[Y1];
  996.  
  997.     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
  998. }
  999.  
  1000. static void
  1001. i965_subpic_render_upload_vertex(VADriverContextP ctx,
  1002.                                  struct object_surface *obj_surface,
  1003.                                  const VARectangle *output_rect)
  1004. {    
  1005.     unsigned int index = obj_surface->subpic_render_idx;
  1006.     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
  1007.     float tex_coords[4], vid_coords[4];
  1008.     VARectangle dst_rect;
  1009.  
  1010.     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
  1011.         dst_rect = obj_subpic->dst_rect;
  1012.     else {
  1013.         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
  1014.         const float sy  = (float)output_rect->height / obj_surface->orig_height;
  1015.         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
  1016.         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
  1017.         dst_rect.width  = sx * obj_subpic->dst_rect.width;
  1018.         dst_rect.height = sy * obj_subpic->dst_rect.height;
  1019.     }
  1020.  
  1021.     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
  1022.     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
  1023.     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
  1024.     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
  1025.  
  1026.     vid_coords[0] = dst_rect.x;
  1027.     vid_coords[1] = dst_rect.y;
  1028.     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
  1029.     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
  1030.  
  1031.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1032. }
  1033.  
  1034. static void
  1035. i965_render_upload_vertex(
  1036.     VADriverContextP   ctx,
  1037.     struct object_surface *obj_surface,
  1038.     const VARectangle *src_rect,
  1039.     const VARectangle *dst_rect
  1040. )
  1041. {
  1042.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1043.     struct i965_render_state *render_state = &i965->render_state;
  1044.     struct intel_region *dest_region = render_state->draw_region;
  1045.     float tex_coords[4], vid_coords[4];
  1046.     int width, height;
  1047.  
  1048.     width  = obj_surface->orig_width;
  1049.     height = obj_surface->orig_height;
  1050.  
  1051.     tex_coords[0] = (float)src_rect->x / width;
  1052.     tex_coords[1] = (float)src_rect->y / height;
  1053.     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
  1054.     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
  1055.  
  1056.     vid_coords[0] = dest_region->x + dst_rect->x;
  1057.     vid_coords[1] = dest_region->y + dst_rect->y;
  1058.     vid_coords[2] = vid_coords[0] + dst_rect->width;
  1059.     vid_coords[3] = vid_coords[1] + dst_rect->height;
  1060.  
  1061.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1062. }
  1063.  
  1064. #define PI  3.1415926
  1065.  
  1066. static void
  1067. i965_render_upload_constants(VADriverContextP ctx,
  1068.                              struct object_surface *obj_surface,
  1069.                              unsigned int flags)
  1070. {
  1071.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1072.     struct i965_render_state *render_state = &i965->render_state;
  1073.     unsigned short *constant_buffer;
  1074.     float *color_balance_base;
  1075.     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
  1076.     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
  1077.     float hue = (float)i965->hue_attrib->value / 180 * PI;
  1078.     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
  1079.     float *yuv_to_rgb;
  1080.     const float* yuv_coefs;
  1081.     size_t coefs_length;
  1082.  
  1083.     dri_bo_map(render_state->curbe.bo, 1);
  1084.     assert(render_state->curbe.bo->virtual);
  1085.     constant_buffer = render_state->curbe.bo->virtual;
  1086.  
  1087.     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
  1088.         assert(obj_surface->fourcc == VA_FOURCC_Y800);
  1089.  
  1090.         constant_buffer[0] = 2;
  1091.     } else {
  1092.         if (obj_surface->fourcc == VA_FOURCC_NV12)
  1093.             constant_buffer[0] = 1;
  1094.         else
  1095.             constant_buffer[0] = 0;
  1096.     }
  1097.  
  1098.     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
  1099.         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
  1100.         i965->hue_attrib->value == DEFAULT_HUE &&
  1101.         i965->saturation_attrib->value == DEFAULT_SATURATION)
  1102.         constant_buffer[1] = 1; /* skip color balance transformation */
  1103.     else
  1104.         constant_buffer[1] = 0;
  1105.  
  1106.     color_balance_base = (float *)constant_buffer + 4;
  1107.     *color_balance_base++ = contrast;
  1108.     *color_balance_base++ = brightness;
  1109.     *color_balance_base++ = cos(hue) * contrast * saturation;
  1110.     *color_balance_base++ = sin(hue) * contrast * saturation;
  1111.  
  1112.     yuv_to_rgb = (float *)constant_buffer + 8;
  1113.     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
  1114.                                              &coefs_length);
  1115.     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
  1116.  
  1117.     dri_bo_unmap(render_state->curbe.bo);
  1118. }
  1119.  
  1120. static void
  1121. i965_subpic_render_upload_constants(VADriverContextP ctx,
  1122.                                     struct object_surface *obj_surface)
  1123. {
  1124.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1125.     struct i965_render_state *render_state = &i965->render_state;
  1126.     float *constant_buffer;
  1127.     float global_alpha = 1.0;
  1128.     unsigned int index = obj_surface->subpic_render_idx;
  1129.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1130.    
  1131.     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
  1132.         global_alpha = obj_subpic->global_alpha;
  1133.     }
  1134.  
  1135.     dri_bo_map(render_state->curbe.bo, 1);
  1136.  
  1137.     assert(render_state->curbe.bo->virtual);
  1138.     constant_buffer = render_state->curbe.bo->virtual;
  1139.     *constant_buffer = global_alpha;
  1140.  
  1141.     dri_bo_unmap(render_state->curbe.bo);
  1142. }
  1143.  
  1144. static void
  1145. i965_surface_render_state_setup(
  1146.     VADriverContextP   ctx,
  1147.     struct object_surface *obj_surface,
  1148.     const VARectangle *src_rect,
  1149.     const VARectangle *dst_rect,
  1150.     unsigned int       flags
  1151. )
  1152. {
  1153.     i965_render_vs_unit(ctx);
  1154.     i965_render_sf_unit(ctx);
  1155.     i965_render_dest_surface_state(ctx, 0);
  1156.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  1157.     i965_render_sampler(ctx);
  1158.     i965_render_wm_unit(ctx);
  1159.     i965_render_cc_viewport(ctx);
  1160.     i965_render_cc_unit(ctx);
  1161.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  1162.     i965_render_upload_constants(ctx, obj_surface, flags);
  1163. }
  1164.  
  1165. static void
  1166. i965_subpic_render_state_setup(
  1167.     VADriverContextP   ctx,
  1168.     struct object_surface *obj_surface,
  1169.     const VARectangle *src_rect,
  1170.     const VARectangle *dst_rect
  1171. )
  1172. {
  1173.     i965_render_vs_unit(ctx);
  1174.     i965_render_sf_unit(ctx);
  1175.     i965_render_dest_surface_state(ctx, 0);
  1176.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  1177.     i965_render_sampler(ctx);
  1178.     i965_subpic_render_wm_unit(ctx);
  1179.     i965_render_cc_viewport(ctx);
  1180.     i965_subpic_render_cc_unit(ctx);
  1181.     i965_subpic_render_upload_constants(ctx, obj_surface);
  1182.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  1183. }
  1184.  
  1185.  
  1186. static void
  1187. i965_render_pipeline_select(VADriverContextP ctx)
  1188. {
  1189.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1190.     struct intel_batchbuffer *batch = i965->batch;
  1191.  
  1192.     BEGIN_BATCH(batch, 1);
  1193.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1194.     ADVANCE_BATCH(batch);
  1195. }
  1196.  
  1197. static void
  1198. i965_render_state_sip(VADriverContextP ctx)
  1199. {
  1200.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1201.     struct intel_batchbuffer *batch = i965->batch;
  1202.  
  1203.     BEGIN_BATCH(batch, 2);
  1204.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1205.     OUT_BATCH(batch, 0);
  1206.     ADVANCE_BATCH(batch);
  1207. }
  1208.  
  1209. static void
  1210. i965_render_state_base_address(VADriverContextP ctx)
  1211. {
  1212.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1213.     struct intel_batchbuffer *batch = i965->batch;
  1214.     struct i965_render_state *render_state = &i965->render_state;
  1215.  
  1216.     if (IS_IRONLAKE(i965->intel.device_info)) {
  1217.         BEGIN_BATCH(batch, 8);
  1218.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
  1219.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1220.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1221.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1222.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1223.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1224.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1225.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1226.         ADVANCE_BATCH(batch);
  1227.     } else {
  1228.         BEGIN_BATCH(batch, 6);
  1229.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
  1230.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1231.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1232.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1233.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1234.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1235.         ADVANCE_BATCH(batch);
  1236.     }
  1237. }
  1238.  
  1239. static void
  1240. i965_render_binding_table_pointers(VADriverContextP ctx)
  1241. {
  1242.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1243.     struct intel_batchbuffer *batch = i965->batch;
  1244.  
  1245.     BEGIN_BATCH(batch, 6);
  1246.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
  1247.     OUT_BATCH(batch, 0); /* vs */
  1248.     OUT_BATCH(batch, 0); /* gs */
  1249.     OUT_BATCH(batch, 0); /* clip */
  1250.     OUT_BATCH(batch, 0); /* sf */
  1251.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1252.     ADVANCE_BATCH(batch);
  1253. }
  1254.  
  1255. static void
  1256. i965_render_constant_color(VADriverContextP ctx)
  1257. {
  1258.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1259.     struct intel_batchbuffer *batch = i965->batch;
  1260.  
  1261.     BEGIN_BATCH(batch, 5);
  1262.     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
  1263.     OUT_BATCH(batch, float_to_uint(1.0));
  1264.     OUT_BATCH(batch, float_to_uint(0.0));
  1265.     OUT_BATCH(batch, float_to_uint(1.0));
  1266.     OUT_BATCH(batch, float_to_uint(1.0));
  1267.     ADVANCE_BATCH(batch);
  1268. }
  1269.  
  1270. static void
  1271. i965_render_pipelined_pointers(VADriverContextP ctx)
  1272. {
  1273.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1274.     struct intel_batchbuffer *batch = i965->batch;
  1275.     struct i965_render_state *render_state = &i965->render_state;
  1276.  
  1277.     BEGIN_BATCH(batch, 7);
  1278.     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
  1279.     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1280.     OUT_BATCH(batch, 0);  /* disable GS */
  1281.     OUT_BATCH(batch, 0);  /* disable CLIP */
  1282.     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1283.     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1284.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1285.     ADVANCE_BATCH(batch);
  1286. }
  1287.  
  1288. static void
  1289. i965_render_urb_layout(VADriverContextP ctx)
  1290. {
  1291.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1292.     struct intel_batchbuffer *batch = i965->batch;
  1293.     int urb_vs_start, urb_vs_size;
  1294.     int urb_gs_start, urb_gs_size;
  1295.     int urb_clip_start, urb_clip_size;
  1296.     int urb_sf_start, urb_sf_size;
  1297.     int urb_cs_start, urb_cs_size;
  1298.  
  1299.     urb_vs_start = 0;
  1300.     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
  1301.     urb_gs_start = urb_vs_start + urb_vs_size;
  1302.     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
  1303.     urb_clip_start = urb_gs_start + urb_gs_size;
  1304.     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
  1305.     urb_sf_start = urb_clip_start + urb_clip_size;
  1306.     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
  1307.     urb_cs_start = urb_sf_start + urb_sf_size;
  1308.     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
  1309.  
  1310.     BEGIN_BATCH(batch, 3);
  1311.     OUT_BATCH(batch,
  1312.               CMD_URB_FENCE |
  1313.               UF0_CS_REALLOC |
  1314.               UF0_SF_REALLOC |
  1315.               UF0_CLIP_REALLOC |
  1316.               UF0_GS_REALLOC |
  1317.               UF0_VS_REALLOC |
  1318.               1);
  1319.     OUT_BATCH(batch,
  1320.               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
  1321.               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
  1322.               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
  1323.     OUT_BATCH(batch,
  1324.               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
  1325.               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
  1326.     ADVANCE_BATCH(batch);
  1327. }
  1328.  
  1329. static void
  1330. i965_render_cs_urb_layout(VADriverContextP ctx)
  1331. {
  1332.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1333.     struct intel_batchbuffer *batch = i965->batch;
  1334.  
  1335.     BEGIN_BATCH(batch, 2);
  1336.     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
  1337.     OUT_BATCH(batch,
  1338.               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
  1339.               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
  1340.     ADVANCE_BATCH(batch);
  1341. }
  1342.  
  1343. static void
  1344. i965_render_constant_buffer(VADriverContextP ctx)
  1345. {
  1346.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1347.     struct intel_batchbuffer *batch = i965->batch;
  1348.     struct i965_render_state *render_state = &i965->render_state;
  1349.  
  1350.     BEGIN_BATCH(batch, 2);
  1351.     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
  1352.     OUT_RELOC(batch, render_state->curbe.bo,
  1353.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  1354.               URB_CS_ENTRY_SIZE - 1);
  1355.     ADVANCE_BATCH(batch);    
  1356. }
  1357.  
  1358. static void
  1359. i965_render_drawing_rectangle(VADriverContextP ctx)
  1360. {
  1361.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1362.     struct intel_batchbuffer *batch = i965->batch;
  1363.     struct i965_render_state *render_state = &i965->render_state;
  1364.     struct intel_region *dest_region = render_state->draw_region;
  1365.  
  1366.     BEGIN_BATCH(batch, 4);
  1367.     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
  1368.     OUT_BATCH(batch, 0x00000000);
  1369.     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
  1370.     OUT_BATCH(batch, 0x00000000);        
  1371.     ADVANCE_BATCH(batch);
  1372. }
  1373.  
  1374. static void
  1375. i965_render_vertex_elements(VADriverContextP ctx)
  1376. {
  1377.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1378.     struct intel_batchbuffer *batch = i965->batch;
  1379.  
  1380.     if (IS_IRONLAKE(i965->intel.device_info)) {
  1381.         BEGIN_BATCH(batch, 5);
  1382.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1383.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1384.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1385.                   VE0_VALID |
  1386.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1387.                   (0 << VE0_OFFSET_SHIFT));
  1388.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1389.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1390.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1391.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1392.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1393.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1394.                   VE0_VALID |
  1395.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1396.                   (8 << VE0_OFFSET_SHIFT));
  1397.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1398.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1399.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1400.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1401.         ADVANCE_BATCH(batch);
  1402.     } else {
  1403.         BEGIN_BATCH(batch, 5);
  1404.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1405.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1406.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1407.                   VE0_VALID |
  1408.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1409.                   (0 << VE0_OFFSET_SHIFT));
  1410.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1411.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1412.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1413.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1414.                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1415.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1416.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1417.                   VE0_VALID |
  1418.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1419.                   (8 << VE0_OFFSET_SHIFT));
  1420.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1421.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1422.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1423.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1424.                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1425.         ADVANCE_BATCH(batch);
  1426.     }
  1427. }
  1428.  
  1429. static void
  1430. i965_render_upload_image_palette(
  1431.     VADriverContextP ctx,
  1432.     struct object_image *obj_image,
  1433.     unsigned int     alpha
  1434. )
  1435. {
  1436.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1437.     struct intel_batchbuffer *batch = i965->batch;
  1438.     unsigned int i;
  1439.  
  1440.     assert(obj_image);
  1441.  
  1442.     if (!obj_image)
  1443.         return;
  1444.  
  1445.     if (obj_image->image.num_palette_entries == 0)
  1446.         return;
  1447.  
  1448.     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
  1449.     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
  1450.     /*fill palette*/
  1451.     //int32_t out[16]; //0-23:color 23-31:alpha
  1452.     for (i = 0; i < obj_image->image.num_palette_entries; i++)
  1453.         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
  1454.     ADVANCE_BATCH(batch);
  1455. }
  1456.  
  1457. static void
  1458. i965_render_startup(VADriverContextP ctx)
  1459. {
  1460.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1461.     struct intel_batchbuffer *batch = i965->batch;
  1462.     struct i965_render_state *render_state = &i965->render_state;
  1463.  
  1464.     BEGIN_BATCH(batch, 11);
  1465.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  1466.     OUT_BATCH(batch,
  1467.               (0 << VB0_BUFFER_INDEX_SHIFT) |
  1468.               VB0_VERTEXDATA |
  1469.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  1470.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  1471.  
  1472.     if (IS_IRONLAKE(i965->intel.device_info))
  1473.         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  1474.     else
  1475.         OUT_BATCH(batch, 3);
  1476.  
  1477.     OUT_BATCH(batch, 0);
  1478.  
  1479.     OUT_BATCH(batch,
  1480.               CMD_3DPRIMITIVE |
  1481.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  1482.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  1483.               (0 << 9) |
  1484.               4);
  1485.     OUT_BATCH(batch, 3); /* vertex count per instance */
  1486.     OUT_BATCH(batch, 0); /* start vertex offset */
  1487.     OUT_BATCH(batch, 1); /* single instance */
  1488.     OUT_BATCH(batch, 0); /* start instance location */
  1489.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  1490.     ADVANCE_BATCH(batch);
  1491. }
  1492.  
  1493. static void
  1494. i965_clear_dest_region(VADriverContextP ctx)
  1495. {
  1496.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1497.     struct intel_batchbuffer *batch = i965->batch;
  1498.     struct i965_render_state *render_state = &i965->render_state;
  1499.     struct intel_region *dest_region = render_state->draw_region;
  1500.     unsigned int blt_cmd, br13;
  1501.     int pitch;
  1502.  
  1503.     blt_cmd = XY_COLOR_BLT_CMD;
  1504.     br13 = 0xf0 << 16;
  1505.     pitch = dest_region->pitch;
  1506.  
  1507.     if (dest_region->cpp == 4) {
  1508.         br13 |= BR13_8888;
  1509.         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
  1510.     } else {
  1511.         assert(dest_region->cpp == 2);
  1512.         br13 |= BR13_565;
  1513.     }
  1514.  
  1515.     if (dest_region->tiling != I915_TILING_NONE) {
  1516.         blt_cmd |= XY_COLOR_BLT_DST_TILED;
  1517.         pitch /= 4;
  1518.     }
  1519.  
  1520.     br13 |= pitch;
  1521.  
  1522.     if (IS_GEN6(i965->intel.device_info) ||
  1523.         IS_GEN7(i965->intel.device_info)) {
  1524.         intel_batchbuffer_start_atomic_blt(batch, 24);
  1525.         BEGIN_BLT_BATCH(batch, 6);
  1526.     } else {
  1527.         intel_batchbuffer_start_atomic(batch, 24);
  1528.         BEGIN_BATCH(batch, 6);
  1529.     }
  1530.  
  1531.     OUT_BATCH(batch, blt_cmd);
  1532.     OUT_BATCH(batch, br13);
  1533.     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
  1534.     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
  1535.               (dest_region->x + dest_region->width));
  1536.     OUT_RELOC(batch, dest_region->bo,
  1537.               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  1538.               0);
  1539.     OUT_BATCH(batch, 0x0);
  1540.     ADVANCE_BATCH(batch);
  1541.     intel_batchbuffer_end_atomic(batch);
  1542. }
  1543.  
  1544. static void
  1545. i965_surface_render_pipeline_setup(VADriverContextP ctx)
  1546. {
  1547.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1548.     struct intel_batchbuffer *batch = i965->batch;
  1549.  
  1550.     i965_clear_dest_region(ctx);
  1551.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1552.     intel_batchbuffer_emit_mi_flush(batch);
  1553.     i965_render_pipeline_select(ctx);
  1554.     i965_render_state_sip(ctx);
  1555.     i965_render_state_base_address(ctx);
  1556.     i965_render_binding_table_pointers(ctx);
  1557.     i965_render_constant_color(ctx);
  1558.     i965_render_pipelined_pointers(ctx);
  1559.     i965_render_urb_layout(ctx);
  1560.     i965_render_cs_urb_layout(ctx);
  1561.     i965_render_constant_buffer(ctx);
  1562.     i965_render_drawing_rectangle(ctx);
  1563.     i965_render_vertex_elements(ctx);
  1564.     i965_render_startup(ctx);
  1565.     intel_batchbuffer_end_atomic(batch);
  1566. }
  1567.  
  1568. static void
  1569. i965_subpic_render_pipeline_setup(VADriverContextP ctx)
  1570. {
  1571.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1572.     struct intel_batchbuffer *batch = i965->batch;
  1573.  
  1574.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1575.     intel_batchbuffer_emit_mi_flush(batch);
  1576.     i965_render_pipeline_select(ctx);
  1577.     i965_render_state_sip(ctx);
  1578.     i965_render_state_base_address(ctx);
  1579.     i965_render_binding_table_pointers(ctx);
  1580.     i965_render_constant_color(ctx);
  1581.     i965_render_pipelined_pointers(ctx);
  1582.     i965_render_urb_layout(ctx);
  1583.     i965_render_cs_urb_layout(ctx);
  1584.     i965_render_constant_buffer(ctx);
  1585.     i965_render_drawing_rectangle(ctx);
  1586.     i965_render_vertex_elements(ctx);
  1587.     i965_render_startup(ctx);
  1588.     intel_batchbuffer_end_atomic(batch);
  1589. }
  1590.  
  1591.  
  1592. static void
  1593. i965_render_initialize(VADriverContextP ctx)
  1594. {
  1595.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1596.     struct i965_render_state *render_state = &i965->render_state;
  1597.     dri_bo *bo;
  1598.  
  1599.     /* VERTEX BUFFER */
  1600.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1601.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1602.                       "vertex buffer",
  1603.                       4096,
  1604.                       4096);
  1605.     assert(bo);
  1606.     render_state->vb.vertex_buffer = bo;
  1607.  
  1608.     /* VS */
  1609.     dri_bo_unreference(render_state->vs.state);
  1610.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1611.                       "vs state",
  1612.                       sizeof(struct i965_vs_unit_state),
  1613.                       64);
  1614.     assert(bo);
  1615.     render_state->vs.state = bo;
  1616.  
  1617.     /* GS */
  1618.     /* CLIP */
  1619.     /* SF */
  1620.     dri_bo_unreference(render_state->sf.state);
  1621.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1622.                       "sf state",
  1623.                       sizeof(struct i965_sf_unit_state),
  1624.                       64);
  1625.     assert(bo);
  1626.     render_state->sf.state = bo;
  1627.  
  1628.     /* WM */
  1629.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1630.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1631.                       "surface state & binding table",
  1632.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1633.                       4096);
  1634.     assert(bo);
  1635.     render_state->wm.surface_state_binding_table_bo = bo;
  1636.  
  1637.     dri_bo_unreference(render_state->wm.sampler);
  1638.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1639.                       "sampler state",
  1640.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1641.                       64);
  1642.     assert(bo);
  1643.     render_state->wm.sampler = bo;
  1644.     render_state->wm.sampler_count = 0;
  1645.  
  1646.     dri_bo_unreference(render_state->wm.state);
  1647.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1648.                       "wm state",
  1649.                       sizeof(struct i965_wm_unit_state),
  1650.                       64);
  1651.     assert(bo);
  1652.     render_state->wm.state = bo;
  1653.  
  1654.     /* COLOR CALCULATOR */
  1655.     dri_bo_unreference(render_state->cc.state);
  1656.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1657.                       "color calc state",
  1658.                       sizeof(struct i965_cc_unit_state),
  1659.                       64);
  1660.     assert(bo);
  1661.     render_state->cc.state = bo;
  1662.  
  1663.     dri_bo_unreference(render_state->cc.viewport);
  1664.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1665.                       "cc viewport",
  1666.                       sizeof(struct i965_cc_viewport),
  1667.                       64);
  1668.     assert(bo);
  1669.     render_state->cc.viewport = bo;
  1670. }
  1671.  
  1672. static void
  1673. i965_render_put_surface(
  1674.     VADriverContextP   ctx,
  1675.     struct object_surface *obj_surface,
  1676.     const VARectangle *src_rect,
  1677.     const VARectangle *dst_rect,
  1678.     unsigned int       flags
  1679. )
  1680. {
  1681.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1682.     struct intel_batchbuffer *batch = i965->batch;
  1683.  
  1684.     i965_render_initialize(ctx);
  1685.     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
  1686.     i965_surface_render_pipeline_setup(ctx);
  1687.     intel_batchbuffer_flush(batch);
  1688. }
  1689.  
  1690. static void
  1691. i965_render_put_subpicture(
  1692.     VADriverContextP   ctx,
  1693.     struct object_surface *obj_surface,
  1694.     const VARectangle *src_rect,
  1695.     const VARectangle *dst_rect
  1696. )
  1697. {
  1698.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1699.     struct intel_batchbuffer *batch = i965->batch;
  1700.     unsigned int index = obj_surface->subpic_render_idx;
  1701.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1702.  
  1703.     assert(obj_subpic);
  1704.  
  1705.     i965_render_initialize(ctx);
  1706.     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
  1707.     i965_subpic_render_pipeline_setup(ctx);
  1708.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  1709.     intel_batchbuffer_flush(batch);
  1710. }
  1711.  
  1712. /*
  1713.  * for GEN6+
  1714.  */
  1715. static void
  1716. gen6_render_initialize(VADriverContextP ctx)
  1717. {
  1718.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1719.     struct i965_render_state *render_state = &i965->render_state;
  1720.     dri_bo *bo;
  1721.  
  1722.     /* VERTEX BUFFER */
  1723.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1724.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1725.                       "vertex buffer",
  1726.                       4096,
  1727.                       4096);
  1728.     assert(bo);
  1729.     render_state->vb.vertex_buffer = bo;
  1730.  
  1731.     /* WM */
  1732.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1733.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1734.                       "surface state & binding table",
  1735.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1736.                       4096);
  1737.     assert(bo);
  1738.     render_state->wm.surface_state_binding_table_bo = bo;
  1739.  
  1740.     dri_bo_unreference(render_state->wm.sampler);
  1741.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1742.                       "sampler state",
  1743.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1744.                       4096);
  1745.     assert(bo);
  1746.     render_state->wm.sampler = bo;
  1747.     render_state->wm.sampler_count = 0;
  1748.  
  1749.     /* COLOR CALCULATOR */
  1750.     dri_bo_unreference(render_state->cc.state);
  1751.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1752.                       "color calc state",
  1753.                       sizeof(struct gen6_color_calc_state),
  1754.                       4096);
  1755.     assert(bo);
  1756.     render_state->cc.state = bo;
  1757.  
  1758.     /* CC VIEWPORT */
  1759.     dri_bo_unreference(render_state->cc.viewport);
  1760.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1761.                       "cc viewport",
  1762.                       sizeof(struct i965_cc_viewport),
  1763.                       4096);
  1764.     assert(bo);
  1765.     render_state->cc.viewport = bo;
  1766.  
  1767.     /* BLEND STATE */
  1768.     dri_bo_unreference(render_state->cc.blend);
  1769.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1770.                       "blend state",
  1771.                       sizeof(struct gen6_blend_state),
  1772.                       4096);
  1773.     assert(bo);
  1774.     render_state->cc.blend = bo;
  1775.  
  1776.     /* DEPTH & STENCIL STATE */
  1777.     dri_bo_unreference(render_state->cc.depth_stencil);
  1778.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1779.                       "depth & stencil state",
  1780.                       sizeof(struct gen6_depth_stencil_state),
  1781.                       4096);
  1782.     assert(bo);
  1783.     render_state->cc.depth_stencil = bo;
  1784. }
  1785.  
  1786. static void
  1787. gen6_render_color_calc_state(VADriverContextP ctx)
  1788. {
  1789.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1790.     struct i965_render_state *render_state = &i965->render_state;
  1791.     struct gen6_color_calc_state *color_calc_state;
  1792.    
  1793.     dri_bo_map(render_state->cc.state, 1);
  1794.     assert(render_state->cc.state->virtual);
  1795.     color_calc_state = render_state->cc.state->virtual;
  1796.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  1797.     color_calc_state->constant_r = 1.0;
  1798.     color_calc_state->constant_g = 0.0;
  1799.     color_calc_state->constant_b = 1.0;
  1800.     color_calc_state->constant_a = 1.0;
  1801.     dri_bo_unmap(render_state->cc.state);
  1802. }
  1803.  
  1804. static void
  1805. gen6_render_blend_state(VADriverContextP ctx)
  1806. {
  1807.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1808.     struct i965_render_state *render_state = &i965->render_state;
  1809.     struct gen6_blend_state *blend_state;
  1810.    
  1811.     dri_bo_map(render_state->cc.blend, 1);
  1812.     assert(render_state->cc.blend->virtual);
  1813.     blend_state = render_state->cc.blend->virtual;
  1814.     memset(blend_state, 0, sizeof(*blend_state));
  1815.     blend_state->blend1.logic_op_enable = 1;
  1816.     blend_state->blend1.logic_op_func = 0xc;
  1817.     dri_bo_unmap(render_state->cc.blend);
  1818. }
  1819.  
  1820. static void
  1821. gen6_render_depth_stencil_state(VADriverContextP ctx)
  1822. {
  1823.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1824.     struct i965_render_state *render_state = &i965->render_state;
  1825.     struct gen6_depth_stencil_state *depth_stencil_state;
  1826.    
  1827.     dri_bo_map(render_state->cc.depth_stencil, 1);
  1828.     assert(render_state->cc.depth_stencil->virtual);
  1829.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  1830.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  1831.     dri_bo_unmap(render_state->cc.depth_stencil);
  1832. }
  1833.  
  1834. static void
  1835. gen6_render_setup_states(
  1836.     VADriverContextP   ctx,
  1837.     struct object_surface *obj_surface,
  1838.     const VARectangle *src_rect,
  1839.     const VARectangle *dst_rect,
  1840.     unsigned int       flags
  1841. )
  1842. {
  1843.     i965_render_dest_surface_state(ctx, 0);
  1844.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  1845.     i965_render_sampler(ctx);
  1846.     i965_render_cc_viewport(ctx);
  1847.     gen6_render_color_calc_state(ctx);
  1848.     gen6_render_blend_state(ctx);
  1849.     gen6_render_depth_stencil_state(ctx);
  1850.     i965_render_upload_constants(ctx, obj_surface, flags);
  1851.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  1852. }
  1853.  
  1854. static void
  1855. gen6_emit_invarient_states(VADriverContextP ctx)
  1856. {
  1857.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1858.     struct intel_batchbuffer *batch = i965->batch;
  1859.  
  1860.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1861.  
  1862.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
  1863.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  1864.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  1865.     OUT_BATCH(batch, 0);
  1866.  
  1867.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  1868.     OUT_BATCH(batch, 1);
  1869.  
  1870.     /* Set system instruction pointer */
  1871.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1872.     OUT_BATCH(batch, 0);
  1873. }
  1874.  
  1875. static void
  1876. gen6_emit_state_base_address(VADriverContextP ctx)
  1877. {
  1878.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1879.     struct intel_batchbuffer *batch = i965->batch;
  1880.     struct i965_render_state *render_state = &i965->render_state;
  1881.  
  1882.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  1883.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  1884.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  1885.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  1886.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  1887.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  1888.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  1889.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  1890.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  1891.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  1892. }
  1893.  
  1894. static void
  1895. gen6_emit_viewport_state_pointers(VADriverContextP ctx)
  1896. {
  1897.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1898.     struct intel_batchbuffer *batch = i965->batch;
  1899.     struct i965_render_state *render_state = &i965->render_state;
  1900.  
  1901.     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
  1902.               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
  1903.               (4 - 2));
  1904.     OUT_BATCH(batch, 0);
  1905.     OUT_BATCH(batch, 0);
  1906.     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1907. }
  1908.  
  1909. static void
  1910. gen6_emit_urb(VADriverContextP ctx)
  1911. {
  1912.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1913.     struct intel_batchbuffer *batch = i965->batch;
  1914.  
  1915.     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
  1916.     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
  1917.               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
  1918.     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
  1919.               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
  1920. }
  1921.  
  1922. static void
  1923. gen6_emit_cc_state_pointers(VADriverContextP ctx)
  1924. {
  1925.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1926.     struct intel_batchbuffer *batch = i965->batch;
  1927.     struct i965_render_state *render_state = &i965->render_state;
  1928.  
  1929.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
  1930.     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1931.     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1932.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1933. }
  1934.  
  1935. static void
  1936. gen6_emit_sampler_state_pointers(VADriverContextP ctx)
  1937. {
  1938.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1939.     struct intel_batchbuffer *batch = i965->batch;
  1940.     struct i965_render_state *render_state = &i965->render_state;
  1941.  
  1942.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
  1943.               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
  1944.               (4 - 2));
  1945.     OUT_BATCH(batch, 0); /* VS */
  1946.     OUT_BATCH(batch, 0); /* GS */
  1947.     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1948. }
  1949.  
  1950. static void
  1951. gen6_emit_binding_table(VADriverContextP ctx)
  1952. {
  1953.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1954.     struct intel_batchbuffer *batch = i965->batch;
  1955.  
  1956.     /* Binding table pointers */
  1957.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
  1958.               GEN6_BINDING_TABLE_MODIFY_PS |
  1959.               (4 - 2));
  1960.     OUT_BATCH(batch, 0);                /* vs */
  1961.     OUT_BATCH(batch, 0);                /* gs */
  1962.     /* Only the PS uses the binding table */
  1963.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1964. }
  1965.  
  1966. static void
  1967. gen6_emit_depth_buffer_state(VADriverContextP ctx)
  1968. {
  1969.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1970.     struct intel_batchbuffer *batch = i965->batch;
  1971.  
  1972.     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
  1973.     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
  1974.               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
  1975.     OUT_BATCH(batch, 0);
  1976.     OUT_BATCH(batch, 0);
  1977.     OUT_BATCH(batch, 0);
  1978.     OUT_BATCH(batch, 0);
  1979.     OUT_BATCH(batch, 0);
  1980.  
  1981.     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
  1982.     OUT_BATCH(batch, 0);
  1983. }
  1984.  
  1985. static void
  1986. gen6_emit_drawing_rectangle(VADriverContextP ctx)
  1987. {
  1988.     i965_render_drawing_rectangle(ctx);
  1989. }
  1990.  
  1991. static void
  1992. gen6_emit_vs_state(VADriverContextP ctx)
  1993. {
  1994.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1995.     struct intel_batchbuffer *batch = i965->batch;
  1996.  
  1997.     /* disable VS constant buffer */
  1998.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
  1999.     OUT_BATCH(batch, 0);
  2000.     OUT_BATCH(batch, 0);
  2001.     OUT_BATCH(batch, 0);
  2002.     OUT_BATCH(batch, 0);
  2003.        
  2004.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  2005.     OUT_BATCH(batch, 0); /* without VS kernel */
  2006.     OUT_BATCH(batch, 0);
  2007.     OUT_BATCH(batch, 0);
  2008.     OUT_BATCH(batch, 0);
  2009.     OUT_BATCH(batch, 0); /* pass-through */
  2010. }
  2011.  
  2012. static void
  2013. gen6_emit_gs_state(VADriverContextP ctx)
  2014. {
  2015.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2016.     struct intel_batchbuffer *batch = i965->batch;
  2017.  
  2018.     /* disable GS constant buffer */
  2019.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
  2020.     OUT_BATCH(batch, 0);
  2021.     OUT_BATCH(batch, 0);
  2022.     OUT_BATCH(batch, 0);
  2023.     OUT_BATCH(batch, 0);
  2024.        
  2025.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  2026.     OUT_BATCH(batch, 0); /* without GS kernel */
  2027.     OUT_BATCH(batch, 0);
  2028.     OUT_BATCH(batch, 0);
  2029.     OUT_BATCH(batch, 0);
  2030.     OUT_BATCH(batch, 0);
  2031.     OUT_BATCH(batch, 0); /* pass-through */
  2032. }
  2033.  
  2034. static void
  2035. gen6_emit_clip_state(VADriverContextP ctx)
  2036. {
  2037.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2038.     struct intel_batchbuffer *batch = i965->batch;
  2039.  
  2040.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2041.     OUT_BATCH(batch, 0);
  2042.     OUT_BATCH(batch, 0); /* pass-through */
  2043.     OUT_BATCH(batch, 0);
  2044. }
  2045.  
  2046. static void
  2047. gen6_emit_sf_state(VADriverContextP ctx)
  2048. {
  2049.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2050.     struct intel_batchbuffer *batch = i965->batch;
  2051.  
  2052.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
  2053.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
  2054.               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
  2055.               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
  2056.     OUT_BATCH(batch, 0);
  2057.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2058.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
  2059.     OUT_BATCH(batch, 0);
  2060.     OUT_BATCH(batch, 0);
  2061.     OUT_BATCH(batch, 0);
  2062.     OUT_BATCH(batch, 0);
  2063.     OUT_BATCH(batch, 0); /* DW9 */
  2064.     OUT_BATCH(batch, 0);
  2065.     OUT_BATCH(batch, 0);
  2066.     OUT_BATCH(batch, 0);
  2067.     OUT_BATCH(batch, 0);
  2068.     OUT_BATCH(batch, 0); /* DW14 */
  2069.     OUT_BATCH(batch, 0);
  2070.     OUT_BATCH(batch, 0);
  2071.     OUT_BATCH(batch, 0);
  2072.     OUT_BATCH(batch, 0);
  2073.     OUT_BATCH(batch, 0); /* DW19 */
  2074. }
  2075.  
  2076. static void
  2077. gen6_emit_wm_state(VADriverContextP ctx, int kernel)
  2078. {
  2079.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2080.     struct intel_batchbuffer *batch = i965->batch;
  2081.     struct i965_render_state *render_state = &i965->render_state;
  2082.  
  2083.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
  2084.               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
  2085.               (5 - 2));
  2086.     OUT_RELOC(batch,
  2087.               render_state->curbe.bo,
  2088.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2089.               (URB_CS_ENTRY_SIZE-1));
  2090.     OUT_BATCH(batch, 0);
  2091.     OUT_BATCH(batch, 0);
  2092.     OUT_BATCH(batch, 0);
  2093.  
  2094.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
  2095.     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
  2096.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2097.               0);
  2098.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
  2099.               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2100.     OUT_BATCH(batch, 0);
  2101.     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
  2102.     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
  2103.               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
  2104.               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
  2105.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
  2106.               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2107.     OUT_BATCH(batch, 0);
  2108.     OUT_BATCH(batch, 0);
  2109. }
  2110.  
  2111. static void
  2112. gen6_emit_vertex_element_state(VADriverContextP ctx)
  2113. {
  2114.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2115.     struct intel_batchbuffer *batch = i965->batch;
  2116.  
  2117.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2118.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2119.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2120.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2121.               GEN6_VE0_VALID |
  2122.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2123.               (0 << VE0_OFFSET_SHIFT));
  2124.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2125.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2126.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2127.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2128.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2129.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2130.               GEN6_VE0_VALID |
  2131.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2132.               (8 << VE0_OFFSET_SHIFT));
  2133.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2134.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2135.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2136.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2137. }
  2138.  
  2139. static void
  2140. gen6_emit_vertices(VADriverContextP ctx)
  2141. {
  2142.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2143.     struct intel_batchbuffer *batch = i965->batch;
  2144.     struct i965_render_state *render_state = &i965->render_state;
  2145.  
  2146.     BEGIN_BATCH(batch, 11);
  2147.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  2148.     OUT_BATCH(batch,
  2149.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2150.               GEN6_VB0_VERTEXDATA |
  2151.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2152.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2153.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2154.     OUT_BATCH(batch, 0);
  2155.  
  2156.     OUT_BATCH(batch,
  2157.               CMD_3DPRIMITIVE |
  2158.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  2159.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  2160.               (0 << 9) |
  2161.               4);
  2162.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2163.     OUT_BATCH(batch, 0); /* start vertex offset */
  2164.     OUT_BATCH(batch, 1); /* single instance */
  2165.     OUT_BATCH(batch, 0); /* start instance location */
  2166.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  2167.     ADVANCE_BATCH(batch);
  2168. }
  2169.  
  2170. static void
  2171. gen6_render_emit_states(VADriverContextP ctx, int kernel)
  2172. {
  2173.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2174.     struct intel_batchbuffer *batch = i965->batch;
  2175.  
  2176.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2177.     intel_batchbuffer_emit_mi_flush(batch);
  2178.     gen6_emit_invarient_states(ctx);
  2179.     gen6_emit_state_base_address(ctx);
  2180.     gen6_emit_viewport_state_pointers(ctx);
  2181.     gen6_emit_urb(ctx);
  2182.     gen6_emit_cc_state_pointers(ctx);
  2183.     gen6_emit_sampler_state_pointers(ctx);
  2184.     gen6_emit_vs_state(ctx);
  2185.     gen6_emit_gs_state(ctx);
  2186.     gen6_emit_clip_state(ctx);
  2187.     gen6_emit_sf_state(ctx);
  2188.     gen6_emit_wm_state(ctx, kernel);
  2189.     gen6_emit_binding_table(ctx);
  2190.     gen6_emit_depth_buffer_state(ctx);
  2191.     gen6_emit_drawing_rectangle(ctx);
  2192.     gen6_emit_vertex_element_state(ctx);
  2193.     gen6_emit_vertices(ctx);
  2194.     intel_batchbuffer_end_atomic(batch);
  2195. }
  2196.  
  2197. static void
  2198. gen6_render_put_surface(
  2199.     VADriverContextP   ctx,
  2200.     struct object_surface *obj_surface,
  2201.     const VARectangle *src_rect,
  2202.     const VARectangle *dst_rect,
  2203.     unsigned int       flags
  2204. )
  2205. {
  2206.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2207.     struct intel_batchbuffer *batch = i965->batch;
  2208.  
  2209.     gen6_render_initialize(ctx);
  2210.     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  2211.     i965_clear_dest_region(ctx);
  2212.     gen6_render_emit_states(ctx, PS_KERNEL);
  2213.     intel_batchbuffer_flush(batch);
  2214. }
  2215.  
  2216. static void
  2217. gen6_subpicture_render_blend_state(VADriverContextP ctx)
  2218. {
  2219.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2220.     struct i965_render_state *render_state = &i965->render_state;
  2221.     struct gen6_blend_state *blend_state;
  2222.  
  2223.     dri_bo_unmap(render_state->cc.state);    
  2224.     dri_bo_map(render_state->cc.blend, 1);
  2225.     assert(render_state->cc.blend->virtual);
  2226.     blend_state = render_state->cc.blend->virtual;
  2227.     memset(blend_state, 0, sizeof(*blend_state));
  2228.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  2229.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  2230.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  2231.     blend_state->blend0.blend_enable = 1;
  2232.     blend_state->blend1.post_blend_clamp_enable = 1;
  2233.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2234.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  2235.     dri_bo_unmap(render_state->cc.blend);
  2236. }
  2237.  
  2238. static void
  2239. gen6_subpicture_render_setup_states(
  2240.     VADriverContextP   ctx,
  2241.     struct object_surface *obj_surface,
  2242.     const VARectangle *src_rect,
  2243.     const VARectangle *dst_rect
  2244. )
  2245. {
  2246.     i965_render_dest_surface_state(ctx, 0);
  2247.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  2248.     i965_render_sampler(ctx);
  2249.     i965_render_cc_viewport(ctx);
  2250.     gen6_render_color_calc_state(ctx);
  2251.     gen6_subpicture_render_blend_state(ctx);
  2252.     gen6_render_depth_stencil_state(ctx);
  2253.     i965_subpic_render_upload_constants(ctx, obj_surface);
  2254.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  2255. }
  2256.  
  2257. static void
  2258. gen6_render_put_subpicture(
  2259.     VADriverContextP   ctx,
  2260.     struct object_surface *obj_surface,
  2261.     const VARectangle *src_rect,
  2262.     const VARectangle *dst_rect
  2263. )
  2264. {
  2265.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2266.     struct intel_batchbuffer *batch = i965->batch;
  2267.     unsigned int index = obj_surface->subpic_render_idx;
  2268.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  2269.  
  2270.     assert(obj_subpic);
  2271.     gen6_render_initialize(ctx);
  2272.     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  2273.     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  2274.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  2275.     intel_batchbuffer_flush(batch);
  2276. }
  2277.  
  2278. /*
  2279.  * for GEN7
  2280.  */
  2281. static void
  2282. gen7_render_initialize(VADriverContextP ctx)
  2283. {
  2284.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2285.     struct i965_render_state *render_state = &i965->render_state;
  2286.     dri_bo *bo;
  2287.  
  2288.     /* VERTEX BUFFER */
  2289.     dri_bo_unreference(render_state->vb.vertex_buffer);
  2290.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2291.                       "vertex buffer",
  2292.                       4096,
  2293.                       4096);
  2294.     assert(bo);
  2295.     render_state->vb.vertex_buffer = bo;
  2296.  
  2297.     /* WM */
  2298.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  2299.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2300.                       "surface state & binding table",
  2301.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  2302.                       4096);
  2303.     assert(bo);
  2304.     render_state->wm.surface_state_binding_table_bo = bo;
  2305.  
  2306.     dri_bo_unreference(render_state->wm.sampler);
  2307.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2308.                       "sampler state",
  2309.                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
  2310.                       4096);
  2311.     assert(bo);
  2312.     render_state->wm.sampler = bo;
  2313.     render_state->wm.sampler_count = 0;
  2314.  
  2315.     /* COLOR CALCULATOR */
  2316.     dri_bo_unreference(render_state->cc.state);
  2317.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2318.                       "color calc state",
  2319.                       sizeof(struct gen6_color_calc_state),
  2320.                       4096);
  2321.     assert(bo);
  2322.     render_state->cc.state = bo;
  2323.  
  2324.     /* CC VIEWPORT */
  2325.     dri_bo_unreference(render_state->cc.viewport);
  2326.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2327.                       "cc viewport",
  2328.                       sizeof(struct i965_cc_viewport),
  2329.                       4096);
  2330.     assert(bo);
  2331.     render_state->cc.viewport = bo;
  2332.  
  2333.     /* BLEND STATE */
  2334.     dri_bo_unreference(render_state->cc.blend);
  2335.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2336.                       "blend state",
  2337.                       sizeof(struct gen6_blend_state),
  2338.                       4096);
  2339.     assert(bo);
  2340.     render_state->cc.blend = bo;
  2341.  
  2342.     /* DEPTH & STENCIL STATE */
  2343.     dri_bo_unreference(render_state->cc.depth_stencil);
  2344.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2345.                       "depth & stencil state",
  2346.                       sizeof(struct gen6_depth_stencil_state),
  2347.                       4096);
  2348.     assert(bo);
  2349.     render_state->cc.depth_stencil = bo;
  2350. }
  2351.  
  2352. /*
  2353.  * for GEN8
  2354.  */
  2355. #define ALIGNMENT       64
  2356.  
  2357. static void
  2358. gen7_render_color_calc_state(VADriverContextP ctx)
  2359. {
  2360.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2361.     struct i965_render_state *render_state = &i965->render_state;
  2362.     struct gen6_color_calc_state *color_calc_state;
  2363.    
  2364.     dri_bo_map(render_state->cc.state, 1);
  2365.     assert(render_state->cc.state->virtual);
  2366.     color_calc_state = render_state->cc.state->virtual;
  2367.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  2368.     color_calc_state->constant_r = 1.0;
  2369.     color_calc_state->constant_g = 0.0;
  2370.     color_calc_state->constant_b = 1.0;
  2371.     color_calc_state->constant_a = 1.0;
  2372.     dri_bo_unmap(render_state->cc.state);
  2373. }
  2374.  
  2375. static void
  2376. gen7_render_blend_state(VADriverContextP ctx)
  2377. {
  2378.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2379.     struct i965_render_state *render_state = &i965->render_state;
  2380.     struct gen6_blend_state *blend_state;
  2381.    
  2382.     dri_bo_map(render_state->cc.blend, 1);
  2383.     assert(render_state->cc.blend->virtual);
  2384.     blend_state = render_state->cc.blend->virtual;
  2385.     memset(blend_state, 0, sizeof(*blend_state));
  2386.     blend_state->blend1.logic_op_enable = 1;
  2387.     blend_state->blend1.logic_op_func = 0xc;
  2388.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2389.     dri_bo_unmap(render_state->cc.blend);
  2390. }
  2391.  
  2392. static void
  2393. gen7_render_depth_stencil_state(VADriverContextP ctx)
  2394. {
  2395.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2396.     struct i965_render_state *render_state = &i965->render_state;
  2397.     struct gen6_depth_stencil_state *depth_stencil_state;
  2398.    
  2399.     dri_bo_map(render_state->cc.depth_stencil, 1);
  2400.     assert(render_state->cc.depth_stencil->virtual);
  2401.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  2402.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  2403.     dri_bo_unmap(render_state->cc.depth_stencil);
  2404. }
  2405.  
  2406. static void
  2407. gen7_render_sampler(VADriverContextP ctx)
  2408. {
  2409.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2410.     struct i965_render_state *render_state = &i965->render_state;
  2411.     struct gen7_sampler_state *sampler_state;
  2412.     int i;
  2413.    
  2414.     assert(render_state->wm.sampler_count > 0);
  2415.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  2416.  
  2417.     dri_bo_map(render_state->wm.sampler, 1);
  2418.     assert(render_state->wm.sampler->virtual);
  2419.     sampler_state = render_state->wm.sampler->virtual;
  2420.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  2421.         memset(sampler_state, 0, sizeof(*sampler_state));
  2422.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  2423.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  2424.         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2425.         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2426.         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2427.         sampler_state++;
  2428.     }
  2429.  
  2430.     dri_bo_unmap(render_state->wm.sampler);
  2431. }
  2432.  
  2433.  
  2434. static void
  2435. gen7_render_setup_states(
  2436.     VADriverContextP   ctx,
  2437.     struct object_surface *obj_surface,
  2438.     const VARectangle *src_rect,
  2439.     const VARectangle *dst_rect,
  2440.     unsigned int       flags
  2441. )
  2442. {
  2443.     i965_render_dest_surface_state(ctx, 0);
  2444.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  2445.     gen7_render_sampler(ctx);
  2446.     i965_render_cc_viewport(ctx);
  2447.     gen7_render_color_calc_state(ctx);
  2448.     gen7_render_blend_state(ctx);
  2449.     gen7_render_depth_stencil_state(ctx);
  2450.     i965_render_upload_constants(ctx, obj_surface, flags);
  2451.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  2452. }
  2453.  
  2454.  
  2455. static void
  2456. gen7_emit_invarient_states(VADriverContextP ctx)
  2457. {
  2458.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2459.     struct intel_batchbuffer *batch = i965->batch;
  2460.  
  2461.     BEGIN_BATCH(batch, 1);
  2462.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  2463.     ADVANCE_BATCH(batch);
  2464.  
  2465.     BEGIN_BATCH(batch, 4);
  2466.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
  2467.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  2468.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  2469.     OUT_BATCH(batch, 0);
  2470.     OUT_BATCH(batch, 0);
  2471.     ADVANCE_BATCH(batch);
  2472.  
  2473.     BEGIN_BATCH(batch, 2);
  2474.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  2475.     OUT_BATCH(batch, 1);
  2476.     ADVANCE_BATCH(batch);
  2477.  
  2478.     /* Set system instruction pointer */
  2479.     BEGIN_BATCH(batch, 2);
  2480.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  2481.     OUT_BATCH(batch, 0);
  2482.     ADVANCE_BATCH(batch);
  2483. }
  2484.  
  2485. static void
  2486. gen7_emit_state_base_address(VADriverContextP ctx)
  2487. {
  2488.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2489.     struct intel_batchbuffer *batch = i965->batch;
  2490.     struct i965_render_state *render_state = &i965->render_state;
  2491.  
  2492.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  2493.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  2494.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  2495.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  2496.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  2497.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  2498.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  2499.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  2500.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  2501.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  2502. }
  2503.  
  2504. static void
  2505. gen7_emit_viewport_state_pointers(VADriverContextP ctx)
  2506. {
  2507.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2508.     struct intel_batchbuffer *batch = i965->batch;
  2509.     struct i965_render_state *render_state = &i965->render_state;
  2510.  
  2511.     BEGIN_BATCH(batch, 2);
  2512.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
  2513.     OUT_RELOC(batch,
  2514.               render_state->cc.viewport,
  2515.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2516.               0);
  2517.     ADVANCE_BATCH(batch);
  2518.  
  2519.     BEGIN_BATCH(batch, 2);
  2520.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
  2521.     OUT_BATCH(batch, 0);
  2522.     ADVANCE_BATCH(batch);
  2523. }
  2524.  
  2525. /*
  2526.  * URB layout on GEN7
  2527.  * ----------------------------------------
  2528.  * | PS Push Constants (8KB) | VS entries |
  2529.  * ----------------------------------------
  2530.  */
  2531. static void
  2532. gen7_emit_urb(VADriverContextP ctx)
  2533. {
  2534.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2535.     struct intel_batchbuffer *batch = i965->batch;
  2536.     unsigned int num_urb_entries = 32;
  2537.  
  2538.     if (IS_HASWELL(i965->intel.device_info))
  2539.         num_urb_entries = 64;
  2540.  
  2541.     BEGIN_BATCH(batch, 2);
  2542.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
  2543.     OUT_BATCH(batch, 8); /* in 1KBs */
  2544.     ADVANCE_BATCH(batch);
  2545.  
  2546.     BEGIN_BATCH(batch, 2);
  2547.     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
  2548.     OUT_BATCH(batch,
  2549.               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
  2550.               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  2551.               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2552.    ADVANCE_BATCH(batch);
  2553.  
  2554.    BEGIN_BATCH(batch, 2);
  2555.    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
  2556.    OUT_BATCH(batch,
  2557.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2558.              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2559.    ADVANCE_BATCH(batch);
  2560.  
  2561.    BEGIN_BATCH(batch, 2);
  2562.    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
  2563.    OUT_BATCH(batch,
  2564.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2565.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2566.    ADVANCE_BATCH(batch);
  2567.  
  2568.    BEGIN_BATCH(batch, 2);
  2569.    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
  2570.    OUT_BATCH(batch,
  2571.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2572.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2573.    ADVANCE_BATCH(batch);
  2574. }
  2575.  
  2576. static void
  2577. gen7_emit_cc_state_pointers(VADriverContextP ctx)
  2578. {
  2579.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2580.     struct intel_batchbuffer *batch = i965->batch;
  2581.     struct i965_render_state *render_state = &i965->render_state;
  2582.  
  2583.     BEGIN_BATCH(batch, 2);
  2584.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
  2585.     OUT_RELOC(batch,
  2586.               render_state->cc.state,
  2587.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2588.               1);
  2589.     ADVANCE_BATCH(batch);
  2590.  
  2591.     BEGIN_BATCH(batch, 2);
  2592.     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
  2593.     OUT_RELOC(batch,
  2594.               render_state->cc.blend,
  2595.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2596.               1);
  2597.     ADVANCE_BATCH(batch);
  2598.  
  2599.     BEGIN_BATCH(batch, 2);
  2600.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
  2601.     OUT_RELOC(batch,
  2602.               render_state->cc.depth_stencil,
  2603.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2604.               1);
  2605.     ADVANCE_BATCH(batch);
  2606. }
  2607.  
  2608. static void
  2609. gen7_emit_sampler_state_pointers(VADriverContextP ctx)
  2610. {
  2611.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2612.     struct intel_batchbuffer *batch = i965->batch;
  2613.     struct i965_render_state *render_state = &i965->render_state;
  2614.  
  2615.     BEGIN_BATCH(batch, 2);
  2616.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
  2617.     OUT_RELOC(batch,
  2618.               render_state->wm.sampler,
  2619.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2620.               0);
  2621.     ADVANCE_BATCH(batch);
  2622. }
  2623.  
  2624. static void
  2625. gen7_emit_binding_table(VADriverContextP ctx)
  2626. {
  2627.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2628.     struct intel_batchbuffer *batch = i965->batch;
  2629.  
  2630.     BEGIN_BATCH(batch, 2);
  2631.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
  2632.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  2633.     ADVANCE_BATCH(batch);
  2634. }
  2635.  
  2636. static void
  2637. gen7_emit_depth_buffer_state(VADriverContextP ctx)
  2638. {
  2639.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2640.     struct intel_batchbuffer *batch = i965->batch;
  2641.  
  2642.     BEGIN_BATCH(batch, 7);
  2643.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
  2644.     OUT_BATCH(batch,
  2645.               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
  2646.               (I965_SURFACE_NULL << 29));
  2647.     OUT_BATCH(batch, 0);
  2648.     OUT_BATCH(batch, 0);
  2649.     OUT_BATCH(batch, 0);
  2650.     OUT_BATCH(batch, 0);
  2651.     OUT_BATCH(batch, 0);
  2652.     ADVANCE_BATCH(batch);
  2653.  
  2654.     BEGIN_BATCH(batch, 3);
  2655.     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
  2656.     OUT_BATCH(batch, 0);
  2657.     OUT_BATCH(batch, 0);
  2658.     ADVANCE_BATCH(batch);
  2659. }
  2660.  
  2661. static void
  2662. gen7_emit_drawing_rectangle(VADriverContextP ctx)
  2663. {
  2664.     i965_render_drawing_rectangle(ctx);
  2665. }
  2666.  
  2667. static void
  2668. gen7_emit_vs_state(VADriverContextP ctx)
  2669. {
  2670.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2671.     struct intel_batchbuffer *batch = i965->batch;
  2672.  
  2673.     /* disable VS constant buffer */
  2674.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
  2675.     OUT_BATCH(batch, 0);
  2676.     OUT_BATCH(batch, 0);
  2677.     OUT_BATCH(batch, 0);
  2678.     OUT_BATCH(batch, 0);
  2679.     OUT_BATCH(batch, 0);
  2680.     OUT_BATCH(batch, 0);
  2681.        
  2682.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  2683.     OUT_BATCH(batch, 0); /* without VS kernel */
  2684.     OUT_BATCH(batch, 0);
  2685.     OUT_BATCH(batch, 0);
  2686.     OUT_BATCH(batch, 0);
  2687.     OUT_BATCH(batch, 0); /* pass-through */
  2688. }
  2689.  
  2690. static void
  2691. gen7_emit_bypass_state(VADriverContextP ctx)
  2692. {
  2693.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2694.     struct intel_batchbuffer *batch = i965->batch;
  2695.  
  2696.     /* bypass GS */
  2697.     BEGIN_BATCH(batch, 7);
  2698.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
  2699.     OUT_BATCH(batch, 0);
  2700.     OUT_BATCH(batch, 0);
  2701.     OUT_BATCH(batch, 0);
  2702.     OUT_BATCH(batch, 0);
  2703.     OUT_BATCH(batch, 0);
  2704.     OUT_BATCH(batch, 0);
  2705.     ADVANCE_BATCH(batch);
  2706.  
  2707.     BEGIN_BATCH(batch, 7);     
  2708.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  2709.     OUT_BATCH(batch, 0); /* without GS kernel */
  2710.     OUT_BATCH(batch, 0);
  2711.     OUT_BATCH(batch, 0);
  2712.     OUT_BATCH(batch, 0);
  2713.     OUT_BATCH(batch, 0);
  2714.     OUT_BATCH(batch, 0); /* pass-through */
  2715.     ADVANCE_BATCH(batch);
  2716.  
  2717.     BEGIN_BATCH(batch, 2);
  2718.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
  2719.     OUT_BATCH(batch, 0);
  2720.     ADVANCE_BATCH(batch);
  2721.  
  2722.     /* disable HS */
  2723.     BEGIN_BATCH(batch, 7);
  2724.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
  2725.     OUT_BATCH(batch, 0);
  2726.     OUT_BATCH(batch, 0);
  2727.     OUT_BATCH(batch, 0);
  2728.     OUT_BATCH(batch, 0);
  2729.     OUT_BATCH(batch, 0);
  2730.     OUT_BATCH(batch, 0);
  2731.     ADVANCE_BATCH(batch);
  2732.  
  2733.     BEGIN_BATCH(batch, 7);
  2734.     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
  2735.     OUT_BATCH(batch, 0);
  2736.     OUT_BATCH(batch, 0);
  2737.     OUT_BATCH(batch, 0);
  2738.     OUT_BATCH(batch, 0);
  2739.     OUT_BATCH(batch, 0);
  2740.     OUT_BATCH(batch, 0);
  2741.     ADVANCE_BATCH(batch);
  2742.  
  2743.     BEGIN_BATCH(batch, 2);
  2744.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
  2745.     OUT_BATCH(batch, 0);
  2746.     ADVANCE_BATCH(batch);
  2747.  
  2748.     /* Disable TE */
  2749.     BEGIN_BATCH(batch, 4);
  2750.     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
  2751.     OUT_BATCH(batch, 0);
  2752.     OUT_BATCH(batch, 0);
  2753.     OUT_BATCH(batch, 0);
  2754.     ADVANCE_BATCH(batch);
  2755.  
  2756.     /* Disable DS */
  2757.     BEGIN_BATCH(batch, 7);
  2758.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
  2759.     OUT_BATCH(batch, 0);
  2760.     OUT_BATCH(batch, 0);
  2761.     OUT_BATCH(batch, 0);
  2762.     OUT_BATCH(batch, 0);
  2763.     OUT_BATCH(batch, 0);
  2764.     OUT_BATCH(batch, 0);
  2765.     ADVANCE_BATCH(batch);
  2766.  
  2767.     BEGIN_BATCH(batch, 6);
  2768.     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
  2769.     OUT_BATCH(batch, 0);
  2770.     OUT_BATCH(batch, 0);
  2771.     OUT_BATCH(batch, 0);
  2772.     OUT_BATCH(batch, 0);
  2773.     OUT_BATCH(batch, 0);
  2774.     ADVANCE_BATCH(batch);
  2775.  
  2776.     BEGIN_BATCH(batch, 2);
  2777.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
  2778.     OUT_BATCH(batch, 0);
  2779.     ADVANCE_BATCH(batch);
  2780.  
  2781.     /* Disable STREAMOUT */
  2782.     BEGIN_BATCH(batch, 3);
  2783.     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
  2784.     OUT_BATCH(batch, 0);
  2785.     OUT_BATCH(batch, 0);
  2786.     ADVANCE_BATCH(batch);
  2787. }
  2788.  
  2789. static void
  2790. gen7_emit_clip_state(VADriverContextP ctx)
  2791. {
  2792.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2793.     struct intel_batchbuffer *batch = i965->batch;
  2794.  
  2795.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2796.     OUT_BATCH(batch, 0);
  2797.     OUT_BATCH(batch, 0); /* pass-through */
  2798.     OUT_BATCH(batch, 0);
  2799. }
  2800.  
  2801. static void
  2802. gen7_emit_sf_state(VADriverContextP ctx)
  2803. {
  2804.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2805.     struct intel_batchbuffer *batch = i965->batch;
  2806.  
  2807.     BEGIN_BATCH(batch, 14);
  2808.     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
  2809.     OUT_BATCH(batch,
  2810.               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
  2811.               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
  2812.               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
  2813.     OUT_BATCH(batch, 0);
  2814.     OUT_BATCH(batch, 0);
  2815.     OUT_BATCH(batch, 0); /* DW4 */
  2816.     OUT_BATCH(batch, 0);
  2817.     OUT_BATCH(batch, 0);
  2818.     OUT_BATCH(batch, 0);
  2819.     OUT_BATCH(batch, 0);
  2820.     OUT_BATCH(batch, 0); /* DW9 */
  2821.     OUT_BATCH(batch, 0);
  2822.     OUT_BATCH(batch, 0);
  2823.     OUT_BATCH(batch, 0);
  2824.     OUT_BATCH(batch, 0);
  2825.     ADVANCE_BATCH(batch);
  2826.  
  2827.     BEGIN_BATCH(batch, 7);
  2828.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
  2829.     OUT_BATCH(batch, 0);
  2830.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2831.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
  2832.     OUT_BATCH(batch, 0);
  2833.     OUT_BATCH(batch, 0);
  2834.     OUT_BATCH(batch, 0);
  2835.     ADVANCE_BATCH(batch);
  2836. }
  2837.  
  2838. static void
  2839. gen7_emit_wm_state(VADriverContextP ctx, int kernel)
  2840. {
  2841.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2842.     struct intel_batchbuffer *batch = i965->batch;
  2843.     struct i965_render_state *render_state = &i965->render_state;
  2844.     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
  2845.     unsigned int num_samples = 0;
  2846.  
  2847.     if (IS_HASWELL(i965->intel.device_info)) {
  2848.         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
  2849.         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
  2850.     }
  2851.  
  2852.     BEGIN_BATCH(batch, 3);
  2853.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
  2854.     OUT_BATCH(batch,
  2855.               GEN7_WM_DISPATCH_ENABLE |
  2856.               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2857.     OUT_BATCH(batch, 0);
  2858.     ADVANCE_BATCH(batch);
  2859.  
  2860.     BEGIN_BATCH(batch, 7);
  2861.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
  2862.     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
  2863.     OUT_BATCH(batch, 0);
  2864.     OUT_RELOC(batch,
  2865.               render_state->curbe.bo,
  2866.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2867.               0);
  2868.     OUT_BATCH(batch, 0);
  2869.     OUT_BATCH(batch, 0);
  2870.     OUT_BATCH(batch, 0);
  2871.     ADVANCE_BATCH(batch);
  2872.  
  2873.     BEGIN_BATCH(batch, 8);
  2874.     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
  2875.     OUT_RELOC(batch,
  2876.               render_state->render_kernels[kernel].bo,
  2877.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2878.               0);
  2879.     OUT_BATCH(batch,
  2880.               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
  2881.               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2882.     OUT_BATCH(batch, 0); /* scratch space base offset */
  2883.     OUT_BATCH(batch,
  2884.               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
  2885.               GEN7_PS_PUSH_CONSTANT_ENABLE |
  2886.               GEN7_PS_ATTRIBUTE_ENABLE |
  2887.               GEN7_PS_16_DISPATCH_ENABLE);
  2888.     OUT_BATCH(batch,
  2889.               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
  2890.     OUT_BATCH(batch, 0); /* kernel 1 pointer */
  2891.     OUT_BATCH(batch, 0); /* kernel 2 pointer */
  2892.     ADVANCE_BATCH(batch);
  2893. }
  2894.  
  2895. static void
  2896. gen7_emit_vertex_element_state(VADriverContextP ctx)
  2897. {
  2898.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2899.     struct intel_batchbuffer *batch = i965->batch;
  2900.  
  2901.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2902.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2903.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2904.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2905.               GEN6_VE0_VALID |
  2906.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2907.               (0 << VE0_OFFSET_SHIFT));
  2908.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2909.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2910.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2911.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2912.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2913.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2914.               GEN6_VE0_VALID |
  2915.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2916.               (8 << VE0_OFFSET_SHIFT));
  2917.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2918.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2919.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2920.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2921. }
  2922.  
  2923. static void
  2924. gen7_emit_vertices(VADriverContextP ctx)
  2925. {
  2926.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2927.     struct intel_batchbuffer *batch = i965->batch;
  2928.     struct i965_render_state *render_state = &i965->render_state;
  2929.  
  2930.     BEGIN_BATCH(batch, 5);
  2931.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
  2932.     OUT_BATCH(batch,
  2933.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2934.               GEN6_VB0_VERTEXDATA |
  2935.               GEN7_VB0_ADDRESS_MODIFYENABLE |
  2936.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2937.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2938.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2939.     OUT_BATCH(batch, 0);
  2940.     ADVANCE_BATCH(batch);
  2941.  
  2942.     BEGIN_BATCH(batch, 7);
  2943.     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
  2944.     OUT_BATCH(batch,
  2945.               _3DPRIM_RECTLIST |
  2946.               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
  2947.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2948.     OUT_BATCH(batch, 0); /* start vertex offset */
  2949.     OUT_BATCH(batch, 1); /* single instance */
  2950.     OUT_BATCH(batch, 0); /* start instance location */
  2951.     OUT_BATCH(batch, 0);
  2952.     ADVANCE_BATCH(batch);
  2953. }
  2954.  
  2955. static void
  2956. gen7_render_emit_states(VADriverContextP ctx, int kernel)
  2957. {
  2958.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2959.     struct intel_batchbuffer *batch = i965->batch;
  2960.  
  2961.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2962.     intel_batchbuffer_emit_mi_flush(batch);
  2963.     gen7_emit_invarient_states(ctx);
  2964.     gen7_emit_state_base_address(ctx);
  2965.     gen7_emit_viewport_state_pointers(ctx);
  2966.     gen7_emit_urb(ctx);
  2967.     gen7_emit_cc_state_pointers(ctx);
  2968.     gen7_emit_sampler_state_pointers(ctx);
  2969.     gen7_emit_bypass_state(ctx);
  2970.     gen7_emit_vs_state(ctx);
  2971.     gen7_emit_clip_state(ctx);
  2972.     gen7_emit_sf_state(ctx);
  2973.     gen7_emit_wm_state(ctx, kernel);
  2974.     gen7_emit_binding_table(ctx);
  2975.     gen7_emit_depth_buffer_state(ctx);
  2976.     gen7_emit_drawing_rectangle(ctx);
  2977.     gen7_emit_vertex_element_state(ctx);
  2978.     gen7_emit_vertices(ctx);
  2979.     intel_batchbuffer_end_atomic(batch);
  2980. }
  2981.  
  2982.  
  2983. static void
  2984. gen7_render_put_surface(
  2985.     VADriverContextP   ctx,
  2986.     struct object_surface *obj_surface,    
  2987.     const VARectangle *src_rect,
  2988.     const VARectangle *dst_rect,
  2989.     unsigned int       flags
  2990. )
  2991. {
  2992.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2993.     struct intel_batchbuffer *batch = i965->batch;
  2994.  
  2995.     gen7_render_initialize(ctx);
  2996.     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  2997.     i965_clear_dest_region(ctx);
  2998.     gen7_render_emit_states(ctx, PS_KERNEL);
  2999.     intel_batchbuffer_flush(batch);
  3000. }
  3001.  
  3002.  
  3003. static void
  3004. gen7_subpicture_render_blend_state(VADriverContextP ctx)
  3005. {
  3006.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3007.     struct i965_render_state *render_state = &i965->render_state;
  3008.     struct gen6_blend_state *blend_state;
  3009.  
  3010.     dri_bo_unmap(render_state->cc.state);    
  3011.     dri_bo_map(render_state->cc.blend, 1);
  3012.     assert(render_state->cc.blend->virtual);
  3013.     blend_state = render_state->cc.blend->virtual;
  3014.     memset(blend_state, 0, sizeof(*blend_state));
  3015.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  3016.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  3017.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  3018.     blend_state->blend0.blend_enable = 1;
  3019.     blend_state->blend1.post_blend_clamp_enable = 1;
  3020.     blend_state->blend1.pre_blend_clamp_enable = 1;
  3021.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  3022.     dri_bo_unmap(render_state->cc.blend);
  3023. }
  3024.  
  3025. static void
  3026. gen7_subpicture_render_setup_states(
  3027.     VADriverContextP   ctx,
  3028.     struct object_surface *obj_surface,
  3029.     const VARectangle *src_rect,
  3030.     const VARectangle *dst_rect
  3031. )
  3032. {
  3033.     i965_render_dest_surface_state(ctx, 0);
  3034.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  3035.     i965_render_sampler(ctx);
  3036.     i965_render_cc_viewport(ctx);
  3037.     gen7_render_color_calc_state(ctx);
  3038.     gen7_subpicture_render_blend_state(ctx);
  3039.     gen7_render_depth_stencil_state(ctx);
  3040.     i965_subpic_render_upload_constants(ctx, obj_surface);
  3041.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  3042. }
  3043.  
  3044. static void
  3045. gen7_render_put_subpicture(
  3046.     VADriverContextP   ctx,
  3047.     struct object_surface *obj_surface,
  3048.     const VARectangle *src_rect,
  3049.     const VARectangle *dst_rect
  3050. )
  3051. {
  3052.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3053.     struct intel_batchbuffer *batch = i965->batch;
  3054.     unsigned int index = obj_surface->subpic_render_idx;
  3055.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  3056.  
  3057.     assert(obj_subpic);
  3058.     gen7_render_initialize(ctx);
  3059.     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  3060.     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  3061.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  3062.     intel_batchbuffer_flush(batch);
  3063. }
  3064.  
  3065.  
  3066. void
  3067. intel_render_put_surface(
  3068.     VADriverContextP   ctx,
  3069.     struct object_surface *obj_surface,
  3070.     const VARectangle *src_rect,
  3071.     const VARectangle *dst_rect,
  3072.     unsigned int       flags
  3073. )
  3074. {
  3075.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3076.     struct i965_render_state *render_state = &i965->render_state;
  3077.     int has_done_scaling = 0;
  3078.     VARectangle calibrated_rect;
  3079.     VASurfaceID out_surface_id = i965_post_processing(ctx,
  3080.                                                       obj_surface,
  3081.                                                       src_rect,
  3082.                                                       dst_rect,
  3083.                                                       flags,
  3084.                                                       &has_done_scaling,
  3085.                                                       &calibrated_rect);
  3086.  
  3087.     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
  3088.  
  3089.     if (out_surface_id != VA_INVALID_ID) {
  3090.         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
  3091.        
  3092.         if (new_obj_surface && new_obj_surface->bo)
  3093.             obj_surface = new_obj_surface;
  3094.  
  3095.         if (has_done_scaling)
  3096.             src_rect = &calibrated_rect;
  3097.     }
  3098.  
  3099.     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
  3100.  
  3101.     if (out_surface_id != VA_INVALID_ID)
  3102.         i965_DestroySurfaces(ctx, &out_surface_id, 1);
  3103. }
  3104.  
  3105. void
  3106. intel_render_put_subpicture(
  3107.     VADriverContextP   ctx,
  3108.     struct object_surface *obj_surface,
  3109.     const VARectangle *src_rect,
  3110.     const VARectangle *dst_rect
  3111. )
  3112. {
  3113.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3114.     struct i965_render_state *render_state = &i965->render_state;
  3115.  
  3116.     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
  3117. }
  3118.  
  3119. static void
  3120. genx_render_terminate(VADriverContextP ctx)
  3121. {
  3122.     int i;
  3123.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3124.     struct i965_render_state *render_state = &i965->render_state;
  3125.  
  3126.     dri_bo_unreference(render_state->curbe.bo);
  3127.     render_state->curbe.bo = NULL;
  3128.  
  3129.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3130.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3131.  
  3132.         dri_bo_unreference(kernel->bo);
  3133.         kernel->bo = NULL;
  3134.     }
  3135.  
  3136.     dri_bo_unreference(render_state->vb.vertex_buffer);
  3137.     render_state->vb.vertex_buffer = NULL;
  3138.     dri_bo_unreference(render_state->vs.state);
  3139.     render_state->vs.state = NULL;
  3140.     dri_bo_unreference(render_state->sf.state);
  3141.     render_state->sf.state = NULL;
  3142.     dri_bo_unreference(render_state->wm.sampler);
  3143.     render_state->wm.sampler = NULL;
  3144.     dri_bo_unreference(render_state->wm.state);
  3145.     render_state->wm.state = NULL;
  3146.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  3147.     dri_bo_unreference(render_state->cc.viewport);
  3148.     render_state->cc.viewport = NULL;
  3149.     dri_bo_unreference(render_state->cc.state);
  3150.     render_state->cc.state = NULL;
  3151.     dri_bo_unreference(render_state->cc.blend);
  3152.     render_state->cc.blend = NULL;
  3153.     dri_bo_unreference(render_state->cc.depth_stencil);
  3154.     render_state->cc.depth_stencil = NULL;
  3155.  
  3156.     if (render_state->draw_region) {
  3157.         dri_bo_unreference(render_state->draw_region->bo);
  3158.         free(render_state->draw_region);
  3159.         render_state->draw_region = NULL;
  3160.     }
  3161. }
  3162.  
  3163. bool
  3164. genx_render_init(VADriverContextP ctx)
  3165. {
  3166.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3167.     struct i965_render_state *render_state = &i965->render_state;
  3168.     int i;
  3169.  
  3170.     /* kernel */
  3171.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
  3172.                                  sizeof(render_kernels_gen5[0])));
  3173.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
  3174.                                  sizeof(render_kernels_gen6[0])));
  3175.  
  3176.     if (IS_GEN7(i965->intel.device_info)) {
  3177.         memcpy(render_state->render_kernels,
  3178.                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
  3179.                sizeof(render_state->render_kernels));
  3180.         render_state->render_put_surface = gen7_render_put_surface;
  3181.         render_state->render_put_subpicture = gen7_render_put_subpicture;
  3182.     } else if (IS_GEN6(i965->intel.device_info)) {
  3183.         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
  3184.         render_state->render_put_surface = gen6_render_put_surface;
  3185.         render_state->render_put_subpicture = gen6_render_put_subpicture;
  3186.     } else if (IS_IRONLAKE(i965->intel.device_info)) {
  3187.         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
  3188.         render_state->render_put_surface = i965_render_put_surface;
  3189.         render_state->render_put_subpicture = i965_render_put_subpicture;
  3190.     } else {
  3191.         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
  3192.         render_state->render_put_surface = i965_render_put_surface;
  3193.         render_state->render_put_subpicture = i965_render_put_subpicture;
  3194.     }
  3195.  
  3196.     render_state->render_terminate = genx_render_terminate;
  3197.  
  3198.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3199.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3200.  
  3201.         if (!kernel->size)
  3202.             continue;
  3203.  
  3204.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  3205.                                   kernel->name,
  3206.                                   kernel->size, 0x1000);
  3207.         assert(kernel->bo);
  3208.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  3209.     }
  3210.  
  3211.     /* constant buffer */
  3212.     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
  3213.                       "constant buffer",
  3214.                       4096, 64);
  3215.     assert(render_state->curbe.bo);
  3216.  
  3217.     return true;
  3218. }
  3219.  
  3220. bool
  3221. i965_render_init(VADriverContextP ctx)
  3222. {
  3223.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3224.  
  3225.     return i965->codec_info->render_init(ctx);
  3226. }
  3227.  
  3228. void
  3229. i965_render_terminate(VADriverContextP ctx)
  3230. {
  3231.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3232.     struct i965_render_state *render_state = &i965->render_state;
  3233.  
  3234.     render_state->render_terminate(ctx);
  3235. }
  3236.