Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2006 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Keith Packard <keithp@keithp.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. /*
  31.  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
  32.  */
  33.  
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <assert.h>
  38.  
  39. #include "intel_batchbuffer.h"
  40. #include "intel_driver.h"
  41. #include "i965_defines.h"
  42. #include "i965_drv_video.h"
  43. #include "i965_structs.h"
  44.  
  45. #include "i965_render.h"
  46.  
  47. #define SF_KERNEL_NUM_GRF       16
  48. #define SF_MAX_THREADS          1
  49.  
  50. static const uint32_t sf_kernel_static[][4] =
  51. {
  52. #include "shaders/render/exa_sf.g4b"
  53. };
  54.  
  55. #define PS_KERNEL_NUM_GRF       32
  56. #define PS_MAX_THREADS          32
  57.  
  58. #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
  59.  
  60. static const uint32_t ps_kernel_static[][4] =
  61. {
  62. #include "shaders/render/exa_wm_xy.g4b"
  63. #include "shaders/render/exa_wm_src_affine.g4b"
  64. #include "shaders/render/exa_wm_src_sample_planar.g4b"
  65. #include "shaders/render/exa_wm_yuv_rgb.g4b"
  66. #include "shaders/render/exa_wm_write.g4b"
  67. };
  68. static const uint32_t ps_subpic_kernel_static[][4] =
  69. {
  70. #include "shaders/render/exa_wm_xy.g4b"
  71. #include "shaders/render/exa_wm_src_affine.g4b"
  72. #include "shaders/render/exa_wm_src_sample_argb.g4b"
  73. #include "shaders/render/exa_wm_write.g4b"
  74. };
  75.  
  76. /* On IRONLAKE */
  77. static const uint32_t sf_kernel_static_gen5[][4] =
  78. {
  79. #include "shaders/render/exa_sf.g4b.gen5"
  80. };
  81.  
  82. static const uint32_t ps_kernel_static_gen5[][4] =
  83. {
  84. #include "shaders/render/exa_wm_xy.g4b.gen5"
  85. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  86. #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
  87. #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
  88. #include "shaders/render/exa_wm_write.g4b.gen5"
  89. };
  90. static const uint32_t ps_subpic_kernel_static_gen5[][4] =
  91. {
  92. #include "shaders/render/exa_wm_xy.g4b.gen5"
  93. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  94. #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
  95. #include "shaders/render/exa_wm_write.g4b.gen5"
  96. };
  97.  
  98. /* programs for Sandybridge */
  99. static const uint32_t sf_kernel_static_gen6[][4] =
  100. {
  101. };
  102.  
  103. static const uint32_t ps_kernel_static_gen6[][4] = {
  104. #include "shaders/render/exa_wm_src_affine.g6b"
  105. #include "shaders/render/exa_wm_src_sample_planar.g6b"
  106. #include "shaders/render/exa_wm_yuv_rgb.g6b"
  107. #include "shaders/render/exa_wm_write.g6b"
  108. };
  109.  
  110. static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
  111. #include "shaders/render/exa_wm_src_affine.g6b"
  112. #include "shaders/render/exa_wm_src_sample_argb.g6b"
  113. #include "shaders/render/exa_wm_write.g6b"
  114. };
  115.  
  116. /* programs for Ivybridge */
  117. static const uint32_t sf_kernel_static_gen7[][4] =
  118. {
  119. };
  120.  
  121. static const uint32_t ps_kernel_static_gen7[][4] = {
  122. #include "shaders/render/exa_wm_src_affine.g7b"
  123. #include "shaders/render/exa_wm_src_sample_planar.g7b"
  124. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  125. #include "shaders/render/exa_wm_write.g7b"
  126. };
  127.  
  128. static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
  129. #include "shaders/render/exa_wm_src_affine.g7b"
  130. #include "shaders/render/exa_wm_src_sample_argb.g7b"
  131. #include "shaders/render/exa_wm_write.g7b"
  132. };
  133.  
  134. /* Programs for Haswell */
  135. static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
  136. #include "shaders/render/exa_wm_src_affine.g7b"
  137. #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
  138. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  139. #include "shaders/render/exa_wm_write.g7b"
  140. };
  141.  
  142. #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
  143. #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
  144. #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
  145. #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
  146. #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
  147.  
  148. static uint32_t float_to_uint (float f)
  149. {
  150.     union {
  151.         uint32_t i;
  152.         float f;
  153.     } x;
  154.  
  155.     x.f = f;
  156.     return x.i;
  157. }
  158.  
  159. enum
  160. {
  161.     SF_KERNEL = 0,
  162.     PS_KERNEL,
  163.     PS_SUBPIC_KERNEL
  164. };
  165.  
  166. static struct i965_kernel render_kernels_gen4[] = {
  167.     {
  168.         "SF",
  169.         SF_KERNEL,
  170.         sf_kernel_static,
  171.         sizeof(sf_kernel_static),
  172.         NULL
  173.     },
  174.     {
  175.         "PS",
  176.         PS_KERNEL,
  177.         ps_kernel_static,
  178.         sizeof(ps_kernel_static),
  179.         NULL
  180.     },
  181.  
  182.     {
  183.         "PS_SUBPIC",
  184.         PS_SUBPIC_KERNEL,
  185.         ps_subpic_kernel_static,
  186.         sizeof(ps_subpic_kernel_static),
  187.         NULL
  188.     }
  189. };
  190.  
  191. static struct i965_kernel render_kernels_gen5[] = {
  192.     {
  193.         "SF",
  194.         SF_KERNEL,
  195.         sf_kernel_static_gen5,
  196.         sizeof(sf_kernel_static_gen5),
  197.         NULL
  198.     },
  199.     {
  200.         "PS",
  201.         PS_KERNEL,
  202.         ps_kernel_static_gen5,
  203.         sizeof(ps_kernel_static_gen5),
  204.         NULL
  205.     },
  206.  
  207.     {
  208.         "PS_SUBPIC",
  209.         PS_SUBPIC_KERNEL,
  210.         ps_subpic_kernel_static_gen5,
  211.         sizeof(ps_subpic_kernel_static_gen5),
  212.         NULL
  213.     }
  214. };
  215.  
  216. static struct i965_kernel render_kernels_gen6[] = {
  217.     {
  218.         "SF",
  219.         SF_KERNEL,
  220.         sf_kernel_static_gen6,
  221.         sizeof(sf_kernel_static_gen6),
  222.         NULL
  223.     },
  224.     {
  225.         "PS",
  226.         PS_KERNEL,
  227.         ps_kernel_static_gen6,
  228.         sizeof(ps_kernel_static_gen6),
  229.         NULL
  230.     },
  231.  
  232.     {
  233.         "PS_SUBPIC",
  234.         PS_SUBPIC_KERNEL,
  235.         ps_subpic_kernel_static_gen6,
  236.         sizeof(ps_subpic_kernel_static_gen6),
  237.         NULL
  238.     }
  239. };
  240.  
  241. static struct i965_kernel render_kernels_gen7[] = {
  242.     {
  243.         "SF",
  244.         SF_KERNEL,
  245.         sf_kernel_static_gen7,
  246.         sizeof(sf_kernel_static_gen7),
  247.         NULL
  248.     },
  249.     {
  250.         "PS",
  251.         PS_KERNEL,
  252.         ps_kernel_static_gen7,
  253.         sizeof(ps_kernel_static_gen7),
  254.         NULL
  255.     },
  256.  
  257.     {
  258.         "PS_SUBPIC",
  259.         PS_SUBPIC_KERNEL,
  260.         ps_subpic_kernel_static_gen7,
  261.         sizeof(ps_subpic_kernel_static_gen7),
  262.         NULL
  263.     }
  264. };
  265.  
  266. static struct i965_kernel render_kernels_gen7_haswell[] = {
  267.     {
  268.         "SF",
  269.         SF_KERNEL,
  270.         sf_kernel_static_gen7,
  271.         sizeof(sf_kernel_static_gen7),
  272.         NULL
  273.     },
  274.     {
  275.         "PS",
  276.         PS_KERNEL,
  277.         ps_kernel_static_gen7_haswell,
  278.         sizeof(ps_kernel_static_gen7_haswell),
  279.         NULL
  280.     },
  281.  
  282.     {
  283.         "PS_SUBPIC",
  284.         PS_SUBPIC_KERNEL,
  285.         ps_subpic_kernel_static_gen7,
  286.         sizeof(ps_subpic_kernel_static_gen7),
  287.         NULL
  288.     }
  289. };
  290.  
  291. #define URB_VS_ENTRIES        8
  292. #define URB_VS_ENTRY_SIZE     1
  293.  
  294. #define URB_GS_ENTRIES        0
  295. #define URB_GS_ENTRY_SIZE     0
  296.  
  297. #define URB_CLIP_ENTRIES      0
  298. #define URB_CLIP_ENTRY_SIZE   0
  299.  
  300. #define URB_SF_ENTRIES        1
  301. #define URB_SF_ENTRY_SIZE     2
  302.  
  303. #define URB_CS_ENTRIES        1
  304. #define URB_CS_ENTRY_SIZE     1
  305.  
  306. static void
  307. i965_render_vs_unit(VADriverContextP ctx)
  308. {
  309.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  310.     struct i965_render_state *render_state = &i965->render_state;
  311.     struct i965_vs_unit_state *vs_state;
  312.  
  313.     dri_bo_map(render_state->vs.state, 1);
  314.     assert(render_state->vs.state->virtual);
  315.     vs_state = render_state->vs.state->virtual;
  316.     memset(vs_state, 0, sizeof(*vs_state));
  317.  
  318.     if (IS_IRONLAKE(i965->intel.device_id))
  319.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
  320.     else
  321.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
  322.  
  323.     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
  324.     vs_state->vs6.vs_enable = 0;
  325.     vs_state->vs6.vert_cache_disable = 1;
  326.    
  327.     dri_bo_unmap(render_state->vs.state);
  328. }
  329.  
  330. static void
  331. i965_render_sf_unit(VADriverContextP ctx)
  332. {
  333.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  334.     struct i965_render_state *render_state = &i965->render_state;
  335.     struct i965_sf_unit_state *sf_state;
  336.  
  337.     dri_bo_map(render_state->sf.state, 1);
  338.     assert(render_state->sf.state->virtual);
  339.     sf_state = render_state->sf.state->virtual;
  340.     memset(sf_state, 0, sizeof(*sf_state));
  341.  
  342.     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
  343.     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
  344.  
  345.     sf_state->sf1.single_program_flow = 1; /* XXX */
  346.     sf_state->sf1.binding_table_entry_count = 0;
  347.     sf_state->sf1.thread_priority = 0;
  348.     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
  349.     sf_state->sf1.illegal_op_exception_enable = 1;
  350.     sf_state->sf1.mask_stack_exception_enable = 1;
  351.     sf_state->sf1.sw_exception_enable = 1;
  352.  
  353.     /* scratch space is not used in our kernel */
  354.     sf_state->thread2.per_thread_scratch_space = 0;
  355.     sf_state->thread2.scratch_space_base_pointer = 0;
  356.  
  357.     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
  358.     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
  359.     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
  360.     sf_state->thread3.urb_entry_read_offset = 0;
  361.     sf_state->thread3.dispatch_grf_start_reg = 3;
  362.  
  363.     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
  364.     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
  365.     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
  366.     sf_state->thread4.stats_enable = 1;
  367.  
  368.     sf_state->sf5.viewport_transform = 0; /* skip viewport */
  369.  
  370.     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
  371.     sf_state->sf6.scissor = 0;
  372.  
  373.     sf_state->sf7.trifan_pv = 2;
  374.  
  375.     sf_state->sf6.dest_org_vbias = 0x8;
  376.     sf_state->sf6.dest_org_hbias = 0x8;
  377.  
  378.     dri_bo_emit_reloc(render_state->sf.state,
  379.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  380.                       sf_state->thread0.grf_reg_count << 1,
  381.                       offsetof(struct i965_sf_unit_state, thread0),
  382.                       render_state->render_kernels[SF_KERNEL].bo);
  383.  
  384.     dri_bo_unmap(render_state->sf.state);
  385. }
  386.  
  387. static void
  388. i965_render_sampler(VADriverContextP ctx)
  389. {
  390.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  391.     struct i965_render_state *render_state = &i965->render_state;
  392.     struct i965_sampler_state *sampler_state;
  393.     int i;
  394.    
  395.     assert(render_state->wm.sampler_count > 0);
  396.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  397.  
  398.     dri_bo_map(render_state->wm.sampler, 1);
  399.     assert(render_state->wm.sampler->virtual);
  400.     sampler_state = render_state->wm.sampler->virtual;
  401.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  402.         memset(sampler_state, 0, sizeof(*sampler_state));
  403.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  404.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  405.         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  406.         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  407.         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  408.         sampler_state++;
  409.     }
  410.  
  411.     dri_bo_unmap(render_state->wm.sampler);
  412. }
  413. static void
  414. i965_subpic_render_wm_unit(VADriverContextP ctx)
  415. {
  416.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  417.     struct i965_render_state *render_state = &i965->render_state;
  418.     struct i965_wm_unit_state *wm_state;
  419.  
  420.     assert(render_state->wm.sampler);
  421.  
  422.     dri_bo_map(render_state->wm.state, 1);
  423.     assert(render_state->wm.state->virtual);
  424.     wm_state = render_state->wm.state->virtual;
  425.     memset(wm_state, 0, sizeof(*wm_state));
  426.  
  427.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  428.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
  429.  
  430.     wm_state->thread1.single_program_flow = 1; /* XXX */
  431.  
  432.     if (IS_IRONLAKE(i965->intel.device_id))
  433.         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
  434.     else
  435.         wm_state->thread1.binding_table_entry_count = 7;
  436.  
  437.     wm_state->thread2.scratch_space_base_pointer = 0;
  438.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  439.  
  440.     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
  441.     wm_state->thread3.const_urb_entry_read_length = 0;
  442.     wm_state->thread3.const_urb_entry_read_offset = 0;
  443.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  444.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  445.  
  446.     wm_state->wm4.stats_enable = 0;
  447.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  448.  
  449.     if (IS_IRONLAKE(i965->intel.device_id)) {
  450.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  451.     } else {
  452.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  453.     }
  454.  
  455.     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
  456.     wm_state->wm5.thread_dispatch_enable = 1;
  457.     wm_state->wm5.enable_16_pix = 1;
  458.     wm_state->wm5.enable_8_pix = 0;
  459.     wm_state->wm5.early_depth_test = 1;
  460.  
  461.     dri_bo_emit_reloc(render_state->wm.state,
  462.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  463.                       wm_state->thread0.grf_reg_count << 1,
  464.                       offsetof(struct i965_wm_unit_state, thread0),
  465.                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
  466.  
  467.     dri_bo_emit_reloc(render_state->wm.state,
  468.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  469.                       wm_state->wm4.sampler_count << 2,
  470.                       offsetof(struct i965_wm_unit_state, wm4),
  471.                       render_state->wm.sampler);
  472.  
  473.     dri_bo_unmap(render_state->wm.state);
  474. }
  475.  
  476.  
  477. static void
  478. i965_render_wm_unit(VADriverContextP ctx)
  479. {
  480.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  481.     struct i965_render_state *render_state = &i965->render_state;
  482.     struct i965_wm_unit_state *wm_state;
  483.  
  484.     assert(render_state->wm.sampler);
  485.  
  486.     dri_bo_map(render_state->wm.state, 1);
  487.     assert(render_state->wm.state->virtual);
  488.     wm_state = render_state->wm.state->virtual;
  489.     memset(wm_state, 0, sizeof(*wm_state));
  490.  
  491.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  492.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
  493.  
  494.     wm_state->thread1.single_program_flow = 1; /* XXX */
  495.  
  496.     if (IS_IRONLAKE(i965->intel.device_id))
  497.         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
  498.     else
  499.         wm_state->thread1.binding_table_entry_count = 7;
  500.  
  501.     wm_state->thread2.scratch_space_base_pointer = 0;
  502.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  503.  
  504.     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
  505.     wm_state->thread3.const_urb_entry_read_length = 1;
  506.     wm_state->thread3.const_urb_entry_read_offset = 0;
  507.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  508.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  509.  
  510.     wm_state->wm4.stats_enable = 0;
  511.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  512.  
  513.     if (IS_IRONLAKE(i965->intel.device_id)) {
  514.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  515.     } else {
  516.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  517.     }
  518.  
  519.     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
  520.     wm_state->wm5.thread_dispatch_enable = 1;
  521.     wm_state->wm5.enable_16_pix = 1;
  522.     wm_state->wm5.enable_8_pix = 0;
  523.     wm_state->wm5.early_depth_test = 1;
  524.  
  525.     dri_bo_emit_reloc(render_state->wm.state,
  526.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  527.                       wm_state->thread0.grf_reg_count << 1,
  528.                       offsetof(struct i965_wm_unit_state, thread0),
  529.                       render_state->render_kernels[PS_KERNEL].bo);
  530.  
  531.     dri_bo_emit_reloc(render_state->wm.state,
  532.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  533.                       wm_state->wm4.sampler_count << 2,
  534.                       offsetof(struct i965_wm_unit_state, wm4),
  535.                       render_state->wm.sampler);
  536.  
  537.     dri_bo_unmap(render_state->wm.state);
  538. }
  539.  
  540. static void
  541. i965_render_cc_viewport(VADriverContextP ctx)
  542. {
  543.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  544.     struct i965_render_state *render_state = &i965->render_state;
  545.     struct i965_cc_viewport *cc_viewport;
  546.  
  547.     dri_bo_map(render_state->cc.viewport, 1);
  548.     assert(render_state->cc.viewport->virtual);
  549.     cc_viewport = render_state->cc.viewport->virtual;
  550.     memset(cc_viewport, 0, sizeof(*cc_viewport));
  551.    
  552.     cc_viewport->min_depth = -1.e35;
  553.     cc_viewport->max_depth = 1.e35;
  554.  
  555.     dri_bo_unmap(render_state->cc.viewport);
  556. }
  557.  
  558. static void
  559. i965_subpic_render_cc_unit(VADriverContextP ctx)
  560. {
  561.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  562.     struct i965_render_state *render_state = &i965->render_state;
  563.     struct i965_cc_unit_state *cc_state;
  564.  
  565.     assert(render_state->cc.viewport);
  566.  
  567.     dri_bo_map(render_state->cc.state, 1);
  568.     assert(render_state->cc.state->virtual);
  569.     cc_state = render_state->cc.state->virtual;
  570.     memset(cc_state, 0, sizeof(*cc_state));
  571.  
  572.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  573.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  574.     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
  575.     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
  576.     cc_state->cc3.blend_enable = 1;     /* enable color blend */
  577.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  578.     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
  579.     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
  580.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  581.  
  582.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  583.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  584.     cc_state->cc5.statistics_enable = 1;
  585.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  586.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  587.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  588.  
  589.     cc_state->cc6.clamp_post_alpha_blend = 0;
  590.     cc_state->cc6.clamp_pre_alpha_blend  =0;
  591.    
  592.     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
  593.     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
  594.     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  595.     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  596.    
  597.     /*alpha test reference*/
  598.     cc_state->cc7.alpha_ref.f =0.0 ;
  599.  
  600.  
  601.     dri_bo_emit_reloc(render_state->cc.state,
  602.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  603.                       0,
  604.                       offsetof(struct i965_cc_unit_state, cc4),
  605.                       render_state->cc.viewport);
  606.  
  607.     dri_bo_unmap(render_state->cc.state);
  608. }
  609.  
  610.  
  611. static void
  612. i965_render_cc_unit(VADriverContextP ctx)
  613. {
  614.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  615.     struct i965_render_state *render_state = &i965->render_state;
  616.     struct i965_cc_unit_state *cc_state;
  617.  
  618.     assert(render_state->cc.viewport);
  619.  
  620.     dri_bo_map(render_state->cc.state, 1);
  621.     assert(render_state->cc.state->virtual);
  622.     cc_state = render_state->cc.state->virtual;
  623.     memset(cc_state, 0, sizeof(*cc_state));
  624.  
  625.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  626.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  627.     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
  628.     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
  629.     cc_state->cc3.blend_enable = 0;     /* disable color blend */
  630.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  631.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  632.  
  633.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  634.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  635.     cc_state->cc5.statistics_enable = 1;
  636.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  637.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
  638.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
  639.  
  640.     dri_bo_emit_reloc(render_state->cc.state,
  641.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  642.                       0,
  643.                       offsetof(struct i965_cc_unit_state, cc4),
  644.                       render_state->cc.viewport);
  645.  
  646.     dri_bo_unmap(render_state->cc.state);
  647. }
  648.  
  649. static void
  650. i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
  651. {
  652.     switch (tiling) {
  653.     case I915_TILING_NONE:
  654.         ss->ss3.tiled_surface = 0;
  655.         ss->ss3.tile_walk = 0;
  656.         break;
  657.     case I915_TILING_X:
  658.         ss->ss3.tiled_surface = 1;
  659.         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
  660.         break;
  661.     case I915_TILING_Y:
  662.         ss->ss3.tiled_surface = 1;
  663.         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
  664.         break;
  665.     }
  666. }
  667.  
  668. static void
  669. i965_render_set_surface_state(
  670.     struct i965_surface_state *ss,
  671.     dri_bo                    *bo,
  672.     unsigned long              offset,
  673.     unsigned int               width,
  674.     unsigned int               height,
  675.     unsigned int               pitch,
  676.     unsigned int               format,
  677.     unsigned int               flags
  678. )
  679. {
  680.     unsigned int tiling;
  681.     unsigned int swizzle;
  682.  
  683.     memset(ss, 0, sizeof(*ss));
  684.  
  685.     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
  686.     case I965_PP_FLAG_BOTTOM_FIELD:
  687.         ss->ss0.vert_line_stride_ofs = 1;
  688.         /* fall-through */
  689.     case I965_PP_FLAG_TOP_FIELD:
  690.         ss->ss0.vert_line_stride = 1;
  691.         height /= 2;
  692.         break;
  693.     }
  694.  
  695.     ss->ss0.surface_type = I965_SURFACE_2D;
  696.     ss->ss0.surface_format = format;
  697.     ss->ss0.color_blend = 1;
  698.  
  699.     ss->ss1.base_addr = bo->offset + offset;
  700.  
  701.     ss->ss2.width = width - 1;
  702.     ss->ss2.height = height - 1;
  703.  
  704.     ss->ss3.pitch = pitch - 1;
  705.  
  706.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  707.     i965_render_set_surface_tiling(ss, tiling);
  708. }
  709.  
  710. static void
  711. gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
  712. {
  713.    switch (tiling) {
  714.    case I915_TILING_NONE:
  715.       ss->ss0.tiled_surface = 0;
  716.       ss->ss0.tile_walk = 0;
  717.       break;
  718.    case I915_TILING_X:
  719.       ss->ss0.tiled_surface = 1;
  720.       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  721.       break;
  722.    case I915_TILING_Y:
  723.       ss->ss0.tiled_surface = 1;
  724.       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  725.       break;
  726.    }
  727. }
  728.  
  729. /* Set "Shader Channel Select" */
  730. void
  731. gen7_render_set_surface_scs(struct gen7_surface_state *ss)
  732. {
  733.     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
  734.     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
  735.     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
  736.     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
  737. }
  738.  
  739. static void
  740. gen7_render_set_surface_state(
  741.     struct gen7_surface_state *ss,
  742.     dri_bo                    *bo,
  743.     unsigned long              offset,
  744.     int                        width,
  745.     int                        height,
  746.     int                        pitch,
  747.     int                        format,
  748.     unsigned int               flags
  749. )
  750. {
  751.     unsigned int tiling;
  752.     unsigned int swizzle;
  753.  
  754.     memset(ss, 0, sizeof(*ss));
  755.  
  756.     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
  757.     case I965_PP_FLAG_BOTTOM_FIELD:
  758.         ss->ss0.vert_line_stride_ofs = 1;
  759.         /* fall-through */
  760.     case I965_PP_FLAG_TOP_FIELD:
  761.         ss->ss0.vert_line_stride = 1;
  762.         height /= 2;
  763.         break;
  764.     }
  765.  
  766.     ss->ss0.surface_type = I965_SURFACE_2D;
  767.     ss->ss0.surface_format = format;
  768.  
  769.     ss->ss1.base_addr = bo->offset + offset;
  770.  
  771.     ss->ss2.width = width - 1;
  772.     ss->ss2.height = height - 1;
  773.  
  774.     ss->ss3.pitch = pitch - 1;
  775.  
  776.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  777.     gen7_render_set_surface_tiling(ss, tiling);
  778. }
  779.  
  780. static void
  781. i965_render_src_surface_state(
  782.     VADriverContextP ctx,
  783.     int              index,
  784.     dri_bo          *region,
  785.     unsigned long    offset,
  786.     int              w,
  787.     int              h,
  788.     int              pitch,
  789.     int              format,
  790.     unsigned int     flags
  791. )
  792. {
  793.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  794.     struct i965_render_state *render_state = &i965->render_state;
  795.     void *ss;
  796.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  797.  
  798.     assert(index < MAX_RENDER_SURFACES);
  799.  
  800.     dri_bo_map(ss_bo, 1);
  801.     assert(ss_bo->virtual);
  802.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  803.  
  804.     if (IS_GEN7(i965->intel.device_id)) {
  805.         gen7_render_set_surface_state(ss,
  806.                                       region, offset,
  807.                                       w, h,
  808.                                       pitch, format, flags);
  809.         if (IS_HASWELL(i965->intel.device_id))
  810.             gen7_render_set_surface_scs(ss);
  811.         dri_bo_emit_reloc(ss_bo,
  812.                           I915_GEM_DOMAIN_SAMPLER, 0,
  813.                           offset,
  814.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  815.                           region);
  816.     } else {
  817.         i965_render_set_surface_state(ss,
  818.                                       region, offset,
  819.                                       w, h,
  820.                                       pitch, format, flags);
  821.         dri_bo_emit_reloc(ss_bo,
  822.                           I915_GEM_DOMAIN_SAMPLER, 0,
  823.                           offset,
  824.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  825.                           region);
  826.     }
  827.  
  828.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  829.     dri_bo_unmap(ss_bo);
  830.     render_state->wm.sampler_count++;
  831. }
  832.  
  833. static void
  834. i965_render_src_surfaces_state(
  835.     VADriverContextP ctx,
  836.     VASurfaceID      surface,
  837.     unsigned int     flags
  838. )
  839. {
  840.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  841.     struct object_surface *obj_surface;
  842.     int region_pitch;
  843.     int rw, rh;
  844.     dri_bo *region;
  845.  
  846.     obj_surface = SURFACE(surface);
  847.     assert(obj_surface);
  848.  
  849.     region_pitch = obj_surface->width;
  850.     rw = obj_surface->orig_width;
  851.     rh = obj_surface->orig_height;
  852.     region = obj_surface->bo;
  853.  
  854.     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
  855.     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
  856.  
  857.     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
  858.         i965_render_src_surface_state(ctx, 3, region,
  859.                                       region_pitch * obj_surface->y_cb_offset,
  860.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  861.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
  862.         i965_render_src_surface_state(ctx, 4, region,
  863.                                       region_pitch * obj_surface->y_cb_offset,
  864.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  865.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
  866.     } else {
  867.         i965_render_src_surface_state(ctx, 3, region,
  868.                                       region_pitch * obj_surface->y_cb_offset,
  869.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  870.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
  871.         i965_render_src_surface_state(ctx, 4, region,
  872.                                       region_pitch * obj_surface->y_cb_offset,
  873.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  874.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  875.         i965_render_src_surface_state(ctx, 5, region,
  876.                                       region_pitch * obj_surface->y_cr_offset,
  877.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  878.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
  879.         i965_render_src_surface_state(ctx, 6, region,
  880.                                       region_pitch * obj_surface->y_cr_offset,
  881.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  882.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  883.     }
  884. }
  885.  
  886. static void
  887. i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
  888.                               VASurfaceID surface)
  889. {
  890.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  891.     struct object_surface *obj_surface = SURFACE(surface);
  892.     dri_bo *subpic_region;
  893.     unsigned int index = obj_surface->subpic_render_idx;
  894.     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
  895.     struct object_image *obj_image = IMAGE(obj_subpic->image);
  896.     assert(obj_surface);
  897.     assert(obj_surface->bo);
  898.     subpic_region = obj_image->bo;
  899.     /*subpicture surface*/
  900.     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  901.     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  902. }
  903.  
  904. static void
  905. i965_render_dest_surface_state(VADriverContextP ctx, int index)
  906. {
  907.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  908.     struct i965_render_state *render_state = &i965->render_state;
  909.     struct intel_region *dest_region = render_state->draw_region;
  910.     void *ss;
  911.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  912.     int format;
  913.     assert(index < MAX_RENDER_SURFACES);
  914.  
  915.     if (dest_region->cpp == 2) {
  916.         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
  917.     } else {
  918.         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
  919.     }
  920.  
  921.     dri_bo_map(ss_bo, 1);
  922.     assert(ss_bo->virtual);
  923.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  924.  
  925.     if (IS_GEN7(i965->intel.device_id)) {
  926.         gen7_render_set_surface_state(ss,
  927.                                       dest_region->bo, 0,
  928.                                       dest_region->width, dest_region->height,
  929.                                       dest_region->pitch, format, 0);
  930.         if (IS_HASWELL(i965->intel.device_id))
  931.             gen7_render_set_surface_scs(ss);
  932.         dri_bo_emit_reloc(ss_bo,
  933.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  934.                           0,
  935.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  936.                           dest_region->bo);
  937.     } else {
  938.         i965_render_set_surface_state(ss,
  939.                                       dest_region->bo, 0,
  940.                                       dest_region->width, dest_region->height,
  941.                                       dest_region->pitch, format, 0);
  942.         dri_bo_emit_reloc(ss_bo,
  943.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  944.                           0,
  945.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  946.                           dest_region->bo);
  947.     }
  948.  
  949.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  950.     dri_bo_unmap(ss_bo);
  951. }
  952.  
  953. static void
  954. i965_fill_vertex_buffer(
  955.     VADriverContextP ctx,
  956.     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
  957.     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
  958. )
  959. {
  960.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  961.     float vb[12];
  962.  
  963.     enum { X1, Y1, X2, Y2 };
  964.  
  965.     static const unsigned int g_rotation_indices[][6] = {
  966.         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
  967.         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
  968.         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
  969.         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
  970.     };
  971.  
  972.     const unsigned int * const rotation_indices =
  973.         g_rotation_indices[i965->rotation_attrib->value];
  974.  
  975.     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
  976.     vb[1]  = tex_coords[rotation_indices[1]];
  977.     vb[2]  = vid_coords[X2];
  978.     vb[3]  = vid_coords[Y2];
  979.  
  980.     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
  981.     vb[5]  = tex_coords[rotation_indices[3]];
  982.     vb[6]  = vid_coords[X1];
  983.     vb[7]  = vid_coords[Y2];
  984.  
  985.     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
  986.     vb[9]  = tex_coords[rotation_indices[5]];
  987.     vb[10] = vid_coords[X1];
  988.     vb[11] = vid_coords[Y1];
  989.  
  990.     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
  991. }
  992.  
  993. static void
  994. i965_subpic_render_upload_vertex(VADriverContextP ctx,
  995.                                  VASurfaceID surface,
  996.                                  const VARectangle *output_rect)
  997. {    
  998.     struct i965_driver_data  *i965         = i965_driver_data(ctx);
  999.     struct object_surface    *obj_surface  = SURFACE(surface);
  1000.     unsigned int index = obj_surface->subpic_render_idx;
  1001.     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic[index]);
  1002.     float tex_coords[4], vid_coords[4];
  1003.     VARectangle dst_rect;
  1004.  
  1005.     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
  1006.         dst_rect = obj_subpic->dst_rect;
  1007.     else {
  1008.         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
  1009.         const float sy  = (float)output_rect->height / obj_surface->orig_height;
  1010.         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
  1011.         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
  1012.         dst_rect.width  = sx * obj_subpic->dst_rect.width;
  1013.         dst_rect.height = sy * obj_subpic->dst_rect.height;
  1014.     }
  1015.  
  1016.     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
  1017.     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
  1018.     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
  1019.     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
  1020.  
  1021.     vid_coords[0] = dst_rect.x;
  1022.     vid_coords[1] = dst_rect.y;
  1023.     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
  1024.     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
  1025.  
  1026.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1027. }
  1028.  
  1029. static void
  1030. i965_render_upload_vertex(
  1031.     VADriverContextP   ctx,
  1032.     VASurfaceID        surface,
  1033.     const VARectangle *src_rect,
  1034.     const VARectangle *dst_rect
  1035. )
  1036. {
  1037.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1038.     struct i965_render_state *render_state = &i965->render_state;
  1039.     struct intel_region *dest_region = render_state->draw_region;
  1040.     struct object_surface *obj_surface;
  1041.     float tex_coords[4], vid_coords[4];
  1042.     int width, height;
  1043.  
  1044.     obj_surface = SURFACE(surface);
  1045.     assert(surface);
  1046.  
  1047.     width  = obj_surface->orig_width;
  1048.     height = obj_surface->orig_height;
  1049.  
  1050.     tex_coords[0] = (float)src_rect->x / width;
  1051.     tex_coords[1] = (float)src_rect->y / height;
  1052.     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
  1053.     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
  1054.  
  1055.     vid_coords[0] = dest_region->x + dst_rect->x;
  1056.     vid_coords[1] = dest_region->y + dst_rect->y;
  1057.     vid_coords[2] = vid_coords[0] + dst_rect->width;
  1058.     vid_coords[3] = vid_coords[1] + dst_rect->height;
  1059.  
  1060.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1061. }
  1062.  
  1063. static void
  1064. i965_render_upload_constants(VADriverContextP ctx,
  1065.                              VASurfaceID surface)
  1066. {
  1067.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1068.     struct i965_render_state *render_state = &i965->render_state;
  1069.     unsigned short *constant_buffer;
  1070.     struct object_surface *obj_surface = SURFACE(surface);
  1071.  
  1072.     dri_bo_map(render_state->curbe.bo, 1);
  1073.     assert(render_state->curbe.bo->virtual);
  1074.     constant_buffer = render_state->curbe.bo->virtual;
  1075.  
  1076.     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
  1077.         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
  1078.                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
  1079.         *constant_buffer = 2;
  1080.     } else {
  1081.         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
  1082.             *constant_buffer = 1;
  1083.         else
  1084.             *constant_buffer = 0;
  1085.     }
  1086.  
  1087.     dri_bo_unmap(render_state->curbe.bo);
  1088. }
  1089.  
  1090. static void
  1091. i965_subpic_render_upload_constants(VADriverContextP ctx,
  1092.                              VASurfaceID surface)
  1093. {
  1094.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1095.     struct i965_render_state *render_state = &i965->render_state;
  1096.     float *constant_buffer;
  1097.     float global_alpha = 1.0;
  1098.     struct object_surface *obj_surface = SURFACE(surface);
  1099.     unsigned int index = obj_surface->subpic_render_idx;
  1100.  
  1101.     if(obj_surface->subpic[index] != VA_INVALID_ID){
  1102.         struct object_subpic *obj_subpic= SUBPIC(obj_surface->subpic[index]);
  1103.         if(obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA){
  1104.            global_alpha = obj_subpic->global_alpha;
  1105.         }
  1106.      }  
  1107.  
  1108.     dri_bo_map(render_state->curbe.bo, 1);
  1109.  
  1110.     assert(render_state->curbe.bo->virtual);
  1111.     constant_buffer = render_state->curbe.bo->virtual;
  1112.     *constant_buffer = global_alpha;
  1113.  
  1114.     dri_bo_unmap(render_state->curbe.bo);
  1115. }
  1116.  
  1117. static void
  1118. i965_surface_render_state_setup(
  1119.     VADriverContextP   ctx,
  1120.     VASurfaceID        surface,
  1121.     const VARectangle *src_rect,
  1122.     const VARectangle *dst_rect,
  1123.     unsigned int       flags
  1124. )
  1125. {
  1126.     i965_render_vs_unit(ctx);
  1127.     i965_render_sf_unit(ctx);
  1128.     i965_render_dest_surface_state(ctx, 0);
  1129.     i965_render_src_surfaces_state(ctx, surface, flags);
  1130.     i965_render_sampler(ctx);
  1131.     i965_render_wm_unit(ctx);
  1132.     i965_render_cc_viewport(ctx);
  1133.     i965_render_cc_unit(ctx);
  1134.     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
  1135.     i965_render_upload_constants(ctx, surface);
  1136. }
  1137.  
  1138. static void
  1139. i965_subpic_render_state_setup(
  1140.     VADriverContextP   ctx,
  1141.     VASurfaceID        surface,
  1142.     const VARectangle *src_rect,
  1143.     const VARectangle *dst_rect
  1144. )
  1145. {
  1146.     i965_render_vs_unit(ctx);
  1147.     i965_render_sf_unit(ctx);
  1148.     i965_render_dest_surface_state(ctx, 0);
  1149.     i965_subpic_render_src_surfaces_state(ctx, surface);
  1150.     i965_render_sampler(ctx);
  1151.     i965_subpic_render_wm_unit(ctx);
  1152.     i965_render_cc_viewport(ctx);
  1153.     i965_subpic_render_cc_unit(ctx);
  1154.     i965_subpic_render_upload_constants(ctx, surface);
  1155.     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
  1156. }
  1157.  
  1158.  
  1159. static void
  1160. i965_render_pipeline_select(VADriverContextP ctx)
  1161. {
  1162.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1163.     struct intel_batchbuffer *batch = i965->batch;
  1164.  
  1165.     BEGIN_BATCH(batch, 1);
  1166.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1167.     ADVANCE_BATCH(batch);
  1168. }
  1169.  
  1170. static void
  1171. i965_render_state_sip(VADriverContextP ctx)
  1172. {
  1173.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1174.     struct intel_batchbuffer *batch = i965->batch;
  1175.  
  1176.     BEGIN_BATCH(batch, 2);
  1177.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1178.     OUT_BATCH(batch, 0);
  1179.     ADVANCE_BATCH(batch);
  1180. }
  1181.  
  1182. static void
  1183. i965_render_state_base_address(VADriverContextP ctx)
  1184. {
  1185.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1186.     struct intel_batchbuffer *batch = i965->batch;
  1187.     struct i965_render_state *render_state = &i965->render_state;
  1188.  
  1189.     if (IS_IRONLAKE(i965->intel.device_id)) {
  1190.         BEGIN_BATCH(batch, 8);
  1191.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
  1192.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1193.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1194.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1195.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1196.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1197.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1198.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1199.         ADVANCE_BATCH(batch);
  1200.     } else {
  1201.         BEGIN_BATCH(batch, 6);
  1202.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
  1203.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1204.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1205.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1206.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1207.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1208.         ADVANCE_BATCH(batch);
  1209.     }
  1210. }
  1211.  
  1212. static void
  1213. i965_render_binding_table_pointers(VADriverContextP ctx)
  1214. {
  1215.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1216.     struct intel_batchbuffer *batch = i965->batch;
  1217.  
  1218.     BEGIN_BATCH(batch, 6);
  1219.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
  1220.     OUT_BATCH(batch, 0); /* vs */
  1221.     OUT_BATCH(batch, 0); /* gs */
  1222.     OUT_BATCH(batch, 0); /* clip */
  1223.     OUT_BATCH(batch, 0); /* sf */
  1224.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1225.     ADVANCE_BATCH(batch);
  1226. }
  1227.  
  1228. static void
  1229. i965_render_constant_color(VADriverContextP ctx)
  1230. {
  1231.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1232.     struct intel_batchbuffer *batch = i965->batch;
  1233.  
  1234.     BEGIN_BATCH(batch, 5);
  1235.     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
  1236.     OUT_BATCH(batch, float_to_uint(1.0));
  1237.     OUT_BATCH(batch, float_to_uint(0.0));
  1238.     OUT_BATCH(batch, float_to_uint(1.0));
  1239.     OUT_BATCH(batch, float_to_uint(1.0));
  1240.     ADVANCE_BATCH(batch);
  1241. }
  1242.  
  1243. static void
  1244. i965_render_pipelined_pointers(VADriverContextP ctx)
  1245. {
  1246.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1247.     struct intel_batchbuffer *batch = i965->batch;
  1248.     struct i965_render_state *render_state = &i965->render_state;
  1249.  
  1250.     BEGIN_BATCH(batch, 7);
  1251.     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
  1252.     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1253.     OUT_BATCH(batch, 0);  /* disable GS */
  1254.     OUT_BATCH(batch, 0);  /* disable CLIP */
  1255.     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1256.     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1257.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1258.     ADVANCE_BATCH(batch);
  1259. }
  1260.  
  1261. static void
  1262. i965_render_urb_layout(VADriverContextP ctx)
  1263. {
  1264.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1265.     struct intel_batchbuffer *batch = i965->batch;
  1266.     int urb_vs_start, urb_vs_size;
  1267.     int urb_gs_start, urb_gs_size;
  1268.     int urb_clip_start, urb_clip_size;
  1269.     int urb_sf_start, urb_sf_size;
  1270.     int urb_cs_start, urb_cs_size;
  1271.  
  1272.     urb_vs_start = 0;
  1273.     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
  1274.     urb_gs_start = urb_vs_start + urb_vs_size;
  1275.     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
  1276.     urb_clip_start = urb_gs_start + urb_gs_size;
  1277.     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
  1278.     urb_sf_start = urb_clip_start + urb_clip_size;
  1279.     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
  1280.     urb_cs_start = urb_sf_start + urb_sf_size;
  1281.     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
  1282.  
  1283.     BEGIN_BATCH(batch, 3);
  1284.     OUT_BATCH(batch,
  1285.               CMD_URB_FENCE |
  1286.               UF0_CS_REALLOC |
  1287.               UF0_SF_REALLOC |
  1288.               UF0_CLIP_REALLOC |
  1289.               UF0_GS_REALLOC |
  1290.               UF0_VS_REALLOC |
  1291.               1);
  1292.     OUT_BATCH(batch,
  1293.               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
  1294.               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
  1295.               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
  1296.     OUT_BATCH(batch,
  1297.               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
  1298.               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
  1299.     ADVANCE_BATCH(batch);
  1300. }
  1301.  
  1302. static void
  1303. i965_render_cs_urb_layout(VADriverContextP ctx)
  1304. {
  1305.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1306.     struct intel_batchbuffer *batch = i965->batch;
  1307.  
  1308.     BEGIN_BATCH(batch, 2);
  1309.     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
  1310.     OUT_BATCH(batch,
  1311.               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
  1312.               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
  1313.     ADVANCE_BATCH(batch);
  1314. }
  1315.  
  1316. static void
  1317. i965_render_constant_buffer(VADriverContextP ctx)
  1318. {
  1319.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1320.     struct intel_batchbuffer *batch = i965->batch;
  1321.     struct i965_render_state *render_state = &i965->render_state;
  1322.  
  1323.     BEGIN_BATCH(batch, 2);
  1324.     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
  1325.     OUT_RELOC(batch, render_state->curbe.bo,
  1326.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  1327.               URB_CS_ENTRY_SIZE - 1);
  1328.     ADVANCE_BATCH(batch);    
  1329. }
  1330.  
  1331. static void
  1332. i965_render_drawing_rectangle(VADriverContextP ctx)
  1333. {
  1334.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1335.     struct intel_batchbuffer *batch = i965->batch;
  1336.     struct i965_render_state *render_state = &i965->render_state;
  1337.     struct intel_region *dest_region = render_state->draw_region;
  1338.  
  1339.     BEGIN_BATCH(batch, 4);
  1340.     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
  1341.     OUT_BATCH(batch, 0x00000000);
  1342.     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
  1343.     OUT_BATCH(batch, 0x00000000);        
  1344.     ADVANCE_BATCH(batch);
  1345. }
  1346.  
  1347. static void
  1348. i965_render_vertex_elements(VADriverContextP ctx)
  1349. {
  1350.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1351.     struct intel_batchbuffer *batch = i965->batch;
  1352.  
  1353.     if (IS_IRONLAKE(i965->intel.device_id)) {
  1354.         BEGIN_BATCH(batch, 5);
  1355.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1356.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1357.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1358.                   VE0_VALID |
  1359.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1360.                   (0 << VE0_OFFSET_SHIFT));
  1361.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1362.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1363.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1364.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1365.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1366.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1367.                   VE0_VALID |
  1368.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1369.                   (8 << VE0_OFFSET_SHIFT));
  1370.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1371.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1372.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1373.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1374.         ADVANCE_BATCH(batch);
  1375.     } else {
  1376.         BEGIN_BATCH(batch, 5);
  1377.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1378.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1379.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1380.                   VE0_VALID |
  1381.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1382.                   (0 << VE0_OFFSET_SHIFT));
  1383.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1384.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1385.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1386.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1387.                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1388.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1389.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1390.                   VE0_VALID |
  1391.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1392.                   (8 << VE0_OFFSET_SHIFT));
  1393.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1394.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1395.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1396.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1397.                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1398.         ADVANCE_BATCH(batch);
  1399.     }
  1400. }
  1401.  
  1402. static void
  1403. i965_render_upload_image_palette(
  1404.     VADriverContextP ctx,
  1405.     VAImageID        image_id,
  1406.     unsigned int     alpha
  1407. )
  1408. {
  1409.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1410.     struct intel_batchbuffer *batch = i965->batch;
  1411.     unsigned int i;
  1412.  
  1413.     struct object_image *obj_image = IMAGE(image_id);
  1414.     assert(obj_image);
  1415.  
  1416.     if (obj_image->image.num_palette_entries == 0)
  1417.         return;
  1418.  
  1419.     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
  1420.     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
  1421.     /*fill palette*/
  1422.     //int32_t out[16]; //0-23:color 23-31:alpha
  1423.     for (i = 0; i < obj_image->image.num_palette_entries; i++)
  1424.         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
  1425.     ADVANCE_BATCH(batch);
  1426. }
  1427.  
  1428. static void
  1429. i965_render_startup(VADriverContextP ctx)
  1430. {
  1431.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1432.     struct intel_batchbuffer *batch = i965->batch;
  1433.     struct i965_render_state *render_state = &i965->render_state;
  1434.  
  1435.     BEGIN_BATCH(batch, 11);
  1436.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  1437.     OUT_BATCH(batch,
  1438.               (0 << VB0_BUFFER_INDEX_SHIFT) |
  1439.               VB0_VERTEXDATA |
  1440.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  1441.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  1442.  
  1443.     if (IS_IRONLAKE(i965->intel.device_id))
  1444.         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  1445.     else
  1446.         OUT_BATCH(batch, 3);
  1447.  
  1448.     OUT_BATCH(batch, 0);
  1449.  
  1450.     OUT_BATCH(batch,
  1451.               CMD_3DPRIMITIVE |
  1452.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  1453.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  1454.               (0 << 9) |
  1455.               4);
  1456.     OUT_BATCH(batch, 3); /* vertex count per instance */
  1457.     OUT_BATCH(batch, 0); /* start vertex offset */
  1458.     OUT_BATCH(batch, 1); /* single instance */
  1459.     OUT_BATCH(batch, 0); /* start instance location */
  1460.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  1461.     ADVANCE_BATCH(batch);
  1462. }
  1463.  
  1464. static void
  1465. i965_clear_dest_region(VADriverContextP ctx)
  1466. {
  1467.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1468.     struct intel_batchbuffer *batch = i965->batch;
  1469.     struct i965_render_state *render_state = &i965->render_state;
  1470.     struct intel_region *dest_region = render_state->draw_region;
  1471.     unsigned int blt_cmd, br13;
  1472.     int pitch;
  1473.  
  1474.     blt_cmd = XY_COLOR_BLT_CMD;
  1475.     br13 = 0xf0 << 16;
  1476.     pitch = dest_region->pitch;
  1477.  
  1478.     if (dest_region->cpp == 4) {
  1479.         br13 |= BR13_8888;
  1480.         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
  1481.     } else {
  1482.         assert(dest_region->cpp == 2);
  1483.         br13 |= BR13_565;
  1484.     }
  1485.  
  1486.     if (dest_region->tiling != I915_TILING_NONE) {
  1487.         blt_cmd |= XY_COLOR_BLT_DST_TILED;
  1488.         pitch /= 4;
  1489.     }
  1490.  
  1491.     br13 |= pitch;
  1492.  
  1493.     if (IS_GEN6(i965->intel.device_id) ||
  1494.         IS_GEN7(i965->intel.device_id)) {
  1495.         intel_batchbuffer_start_atomic_blt(batch, 24);
  1496.         BEGIN_BLT_BATCH(batch, 6);
  1497.     } else {
  1498.         intel_batchbuffer_start_atomic(batch, 24);
  1499.         BEGIN_BATCH(batch, 6);
  1500.     }
  1501.  
  1502.     OUT_BATCH(batch, blt_cmd);
  1503.     OUT_BATCH(batch, br13);
  1504.     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
  1505.     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
  1506.               (dest_region->x + dest_region->width));
  1507.     OUT_RELOC(batch, dest_region->bo,
  1508.               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  1509.               0);
  1510.     OUT_BATCH(batch, 0x0);
  1511.     ADVANCE_BATCH(batch);
  1512.     intel_batchbuffer_end_atomic(batch);
  1513. }
  1514.  
  1515. static void
  1516. i965_surface_render_pipeline_setup(VADriverContextP ctx)
  1517. {
  1518.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1519.     struct intel_batchbuffer *batch = i965->batch;
  1520.  
  1521.     i965_clear_dest_region(ctx);
  1522.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1523.     intel_batchbuffer_emit_mi_flush(batch);
  1524.     i965_render_pipeline_select(ctx);
  1525.     i965_render_state_sip(ctx);
  1526.     i965_render_state_base_address(ctx);
  1527.     i965_render_binding_table_pointers(ctx);
  1528.     i965_render_constant_color(ctx);
  1529.     i965_render_pipelined_pointers(ctx);
  1530.     i965_render_urb_layout(ctx);
  1531.     i965_render_cs_urb_layout(ctx);
  1532.     i965_render_constant_buffer(ctx);
  1533.     i965_render_drawing_rectangle(ctx);
  1534.     i965_render_vertex_elements(ctx);
  1535.     i965_render_startup(ctx);
  1536.     intel_batchbuffer_end_atomic(batch);
  1537. }
  1538.  
  1539. static void
  1540. i965_subpic_render_pipeline_setup(VADriverContextP ctx)
  1541. {
  1542.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1543.     struct intel_batchbuffer *batch = i965->batch;
  1544.  
  1545.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1546.     intel_batchbuffer_emit_mi_flush(batch);
  1547.     i965_render_pipeline_select(ctx);
  1548.     i965_render_state_sip(ctx);
  1549.     i965_render_state_base_address(ctx);
  1550.     i965_render_binding_table_pointers(ctx);
  1551.     i965_render_constant_color(ctx);
  1552.     i965_render_pipelined_pointers(ctx);
  1553.     i965_render_urb_layout(ctx);
  1554.     i965_render_cs_urb_layout(ctx);
  1555.     i965_render_drawing_rectangle(ctx);
  1556.     i965_render_vertex_elements(ctx);
  1557.     i965_render_startup(ctx);
  1558.     intel_batchbuffer_end_atomic(batch);
  1559. }
  1560.  
  1561.  
  1562. static void
  1563. i965_render_initialize(VADriverContextP ctx)
  1564. {
  1565.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1566.     struct i965_render_state *render_state = &i965->render_state;
  1567.     dri_bo *bo;
  1568.  
  1569.     /* VERTEX BUFFER */
  1570.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1571.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1572.                       "vertex buffer",
  1573.                       4096,
  1574.                       4096);
  1575.     assert(bo);
  1576.     render_state->vb.vertex_buffer = bo;
  1577.  
  1578.     /* VS */
  1579.     dri_bo_unreference(render_state->vs.state);
  1580.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1581.                       "vs state",
  1582.                       sizeof(struct i965_vs_unit_state),
  1583.                       64);
  1584.     assert(bo);
  1585.     render_state->vs.state = bo;
  1586.  
  1587.     /* GS */
  1588.     /* CLIP */
  1589.     /* SF */
  1590.     dri_bo_unreference(render_state->sf.state);
  1591.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1592.                       "sf state",
  1593.                       sizeof(struct i965_sf_unit_state),
  1594.                       64);
  1595.     assert(bo);
  1596.     render_state->sf.state = bo;
  1597.  
  1598.     /* WM */
  1599.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1600.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1601.                       "surface state & binding table",
  1602.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1603.                       4096);
  1604.     assert(bo);
  1605.     render_state->wm.surface_state_binding_table_bo = bo;
  1606.  
  1607.     dri_bo_unreference(render_state->wm.sampler);
  1608.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1609.                       "sampler state",
  1610.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1611.                       64);
  1612.     assert(bo);
  1613.     render_state->wm.sampler = bo;
  1614.     render_state->wm.sampler_count = 0;
  1615.  
  1616.     dri_bo_unreference(render_state->wm.state);
  1617.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1618.                       "wm state",
  1619.                       sizeof(struct i965_wm_unit_state),
  1620.                       64);
  1621.     assert(bo);
  1622.     render_state->wm.state = bo;
  1623.  
  1624.     /* COLOR CALCULATOR */
  1625.     dri_bo_unreference(render_state->cc.state);
  1626.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1627.                       "color calc state",
  1628.                       sizeof(struct i965_cc_unit_state),
  1629.                       64);
  1630.     assert(bo);
  1631.     render_state->cc.state = bo;
  1632.  
  1633.     dri_bo_unreference(render_state->cc.viewport);
  1634.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1635.                       "cc viewport",
  1636.                       sizeof(struct i965_cc_viewport),
  1637.                       64);
  1638.     assert(bo);
  1639.     render_state->cc.viewport = bo;
  1640. }
  1641.  
  1642. static void
  1643. i965_render_put_surface(
  1644.     VADriverContextP   ctx,
  1645.     VASurfaceID        surface,
  1646.     const VARectangle *src_rect,
  1647.     const VARectangle *dst_rect,
  1648.     unsigned int       flags
  1649. )
  1650. {
  1651.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1652.     struct intel_batchbuffer *batch = i965->batch;
  1653.  
  1654.     i965_render_initialize(ctx);
  1655.     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags);
  1656.     i965_surface_render_pipeline_setup(ctx);
  1657.     intel_batchbuffer_flush(batch);
  1658. }
  1659.  
  1660. static void
  1661. i965_render_put_subpicture(
  1662.     VADriverContextP   ctx,
  1663.     VASurfaceID        surface,
  1664.     const VARectangle *src_rect,
  1665.     const VARectangle *dst_rect
  1666. )
  1667. {
  1668.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1669.     struct intel_batchbuffer *batch = i965->batch;
  1670.     struct object_surface *obj_surface = SURFACE(surface);
  1671.     unsigned int index = obj_surface->subpic_render_idx;
  1672.     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
  1673.  
  1674.     assert(obj_subpic);
  1675.  
  1676.     i965_render_initialize(ctx);
  1677.     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
  1678.     i965_subpic_render_pipeline_setup(ctx);
  1679.     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
  1680.     intel_batchbuffer_flush(batch);
  1681. }
  1682.  
  1683. /*
  1684.  * for GEN6+
  1685.  */
  1686. static void
  1687. gen6_render_initialize(VADriverContextP ctx)
  1688. {
  1689.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1690.     struct i965_render_state *render_state = &i965->render_state;
  1691.     dri_bo *bo;
  1692.  
  1693.     /* VERTEX BUFFER */
  1694.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1695.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1696.                       "vertex buffer",
  1697.                       4096,
  1698.                       4096);
  1699.     assert(bo);
  1700.     render_state->vb.vertex_buffer = bo;
  1701.  
  1702.     /* WM */
  1703.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1704.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1705.                       "surface state & binding table",
  1706.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1707.                       4096);
  1708.     assert(bo);
  1709.     render_state->wm.surface_state_binding_table_bo = bo;
  1710.  
  1711.     dri_bo_unreference(render_state->wm.sampler);
  1712.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1713.                       "sampler state",
  1714.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1715.                       4096);
  1716.     assert(bo);
  1717.     render_state->wm.sampler = bo;
  1718.     render_state->wm.sampler_count = 0;
  1719.  
  1720.     /* COLOR CALCULATOR */
  1721.     dri_bo_unreference(render_state->cc.state);
  1722.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1723.                       "color calc state",
  1724.                       sizeof(struct gen6_color_calc_state),
  1725.                       4096);
  1726.     assert(bo);
  1727.     render_state->cc.state = bo;
  1728.  
  1729.     /* CC VIEWPORT */
  1730.     dri_bo_unreference(render_state->cc.viewport);
  1731.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1732.                       "cc viewport",
  1733.                       sizeof(struct i965_cc_viewport),
  1734.                       4096);
  1735.     assert(bo);
  1736.     render_state->cc.viewport = bo;
  1737.  
  1738.     /* BLEND STATE */
  1739.     dri_bo_unreference(render_state->cc.blend);
  1740.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1741.                       "blend state",
  1742.                       sizeof(struct gen6_blend_state),
  1743.                       4096);
  1744.     assert(bo);
  1745.     render_state->cc.blend = bo;
  1746.  
  1747.     /* DEPTH & STENCIL STATE */
  1748.     dri_bo_unreference(render_state->cc.depth_stencil);
  1749.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1750.                       "depth & stencil state",
  1751.                       sizeof(struct gen6_depth_stencil_state),
  1752.                       4096);
  1753.     assert(bo);
  1754.     render_state->cc.depth_stencil = bo;
  1755. }
  1756.  
  1757. static void
  1758. gen6_render_color_calc_state(VADriverContextP ctx)
  1759. {
  1760.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1761.     struct i965_render_state *render_state = &i965->render_state;
  1762.     struct gen6_color_calc_state *color_calc_state;
  1763.    
  1764.     dri_bo_map(render_state->cc.state, 1);
  1765.     assert(render_state->cc.state->virtual);
  1766.     color_calc_state = render_state->cc.state->virtual;
  1767.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  1768.     color_calc_state->constant_r = 1.0;
  1769.     color_calc_state->constant_g = 0.0;
  1770.     color_calc_state->constant_b = 1.0;
  1771.     color_calc_state->constant_a = 1.0;
  1772.     dri_bo_unmap(render_state->cc.state);
  1773. }
  1774.  
  1775. static void
  1776. gen6_render_blend_state(VADriverContextP ctx)
  1777. {
  1778.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1779.     struct i965_render_state *render_state = &i965->render_state;
  1780.     struct gen6_blend_state *blend_state;
  1781.    
  1782.     dri_bo_map(render_state->cc.blend, 1);
  1783.     assert(render_state->cc.blend->virtual);
  1784.     blend_state = render_state->cc.blend->virtual;
  1785.     memset(blend_state, 0, sizeof(*blend_state));
  1786.     blend_state->blend1.logic_op_enable = 1;
  1787.     blend_state->blend1.logic_op_func = 0xc;
  1788.     dri_bo_unmap(render_state->cc.blend);
  1789. }
  1790.  
  1791. static void
  1792. gen6_render_depth_stencil_state(VADriverContextP ctx)
  1793. {
  1794.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1795.     struct i965_render_state *render_state = &i965->render_state;
  1796.     struct gen6_depth_stencil_state *depth_stencil_state;
  1797.    
  1798.     dri_bo_map(render_state->cc.depth_stencil, 1);
  1799.     assert(render_state->cc.depth_stencil->virtual);
  1800.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  1801.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  1802.     dri_bo_unmap(render_state->cc.depth_stencil);
  1803. }
  1804.  
  1805. static void
  1806. gen6_render_setup_states(
  1807.     VADriverContextP   ctx,
  1808.     VASurfaceID        surface,
  1809.     const VARectangle *src_rect,
  1810.     const VARectangle *dst_rect,
  1811.     unsigned int       flags
  1812. )
  1813. {
  1814.     i965_render_dest_surface_state(ctx, 0);
  1815.     i965_render_src_surfaces_state(ctx, surface, flags);
  1816.     i965_render_sampler(ctx);
  1817.     i965_render_cc_viewport(ctx);
  1818.     gen6_render_color_calc_state(ctx);
  1819.     gen6_render_blend_state(ctx);
  1820.     gen6_render_depth_stencil_state(ctx);
  1821.     i965_render_upload_constants(ctx, surface);
  1822.     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
  1823. }
  1824.  
  1825. static void
  1826. gen6_emit_invarient_states(VADriverContextP ctx)
  1827. {
  1828.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1829.     struct intel_batchbuffer *batch = i965->batch;
  1830.  
  1831.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1832.  
  1833.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
  1834.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  1835.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  1836.     OUT_BATCH(batch, 0);
  1837.  
  1838.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  1839.     OUT_BATCH(batch, 1);
  1840.  
  1841.     /* Set system instruction pointer */
  1842.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1843.     OUT_BATCH(batch, 0);
  1844. }
  1845.  
  1846. static void
  1847. gen6_emit_state_base_address(VADriverContextP ctx)
  1848. {
  1849.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1850.     struct intel_batchbuffer *batch = i965->batch;
  1851.     struct i965_render_state *render_state = &i965->render_state;
  1852.  
  1853.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  1854.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  1855.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  1856.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  1857.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  1858.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  1859.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  1860.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  1861.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  1862.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  1863. }
  1864.  
  1865. static void
  1866. gen6_emit_viewport_state_pointers(VADriverContextP ctx)
  1867. {
  1868.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1869.     struct intel_batchbuffer *batch = i965->batch;
  1870.     struct i965_render_state *render_state = &i965->render_state;
  1871.  
  1872.     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
  1873.               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
  1874.               (4 - 2));
  1875.     OUT_BATCH(batch, 0);
  1876.     OUT_BATCH(batch, 0);
  1877.     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1878. }
  1879.  
  1880. static void
  1881. gen6_emit_urb(VADriverContextP ctx)
  1882. {
  1883.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1884.     struct intel_batchbuffer *batch = i965->batch;
  1885.  
  1886.     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
  1887.     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
  1888.               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
  1889.     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
  1890.               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
  1891. }
  1892.  
  1893. static void
  1894. gen6_emit_cc_state_pointers(VADriverContextP ctx)
  1895. {
  1896.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1897.     struct intel_batchbuffer *batch = i965->batch;
  1898.     struct i965_render_state *render_state = &i965->render_state;
  1899.  
  1900.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
  1901.     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1902.     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1903.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1904. }
  1905.  
  1906. static void
  1907. gen6_emit_sampler_state_pointers(VADriverContextP ctx)
  1908. {
  1909.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1910.     struct intel_batchbuffer *batch = i965->batch;
  1911.     struct i965_render_state *render_state = &i965->render_state;
  1912.  
  1913.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
  1914.               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
  1915.               (4 - 2));
  1916.     OUT_BATCH(batch, 0); /* VS */
  1917.     OUT_BATCH(batch, 0); /* GS */
  1918.     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1919. }
  1920.  
  1921. static void
  1922. gen6_emit_binding_table(VADriverContextP ctx)
  1923. {
  1924.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1925.     struct intel_batchbuffer *batch = i965->batch;
  1926.  
  1927.     /* Binding table pointers */
  1928.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
  1929.               GEN6_BINDING_TABLE_MODIFY_PS |
  1930.               (4 - 2));
  1931.     OUT_BATCH(batch, 0);                /* vs */
  1932.     OUT_BATCH(batch, 0);                /* gs */
  1933.     /* Only the PS uses the binding table */
  1934.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1935. }
  1936.  
  1937. static void
  1938. gen6_emit_depth_buffer_state(VADriverContextP ctx)
  1939. {
  1940.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1941.     struct intel_batchbuffer *batch = i965->batch;
  1942.  
  1943.     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
  1944.     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
  1945.               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
  1946.     OUT_BATCH(batch, 0);
  1947.     OUT_BATCH(batch, 0);
  1948.     OUT_BATCH(batch, 0);
  1949.     OUT_BATCH(batch, 0);
  1950.     OUT_BATCH(batch, 0);
  1951.  
  1952.     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
  1953.     OUT_BATCH(batch, 0);
  1954. }
  1955.  
  1956. static void
  1957. gen6_emit_drawing_rectangle(VADriverContextP ctx)
  1958. {
  1959.     i965_render_drawing_rectangle(ctx);
  1960. }
  1961.  
  1962. static void
  1963. gen6_emit_vs_state(VADriverContextP ctx)
  1964. {
  1965.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1966.     struct intel_batchbuffer *batch = i965->batch;
  1967.  
  1968.     /* disable VS constant buffer */
  1969.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
  1970.     OUT_BATCH(batch, 0);
  1971.     OUT_BATCH(batch, 0);
  1972.     OUT_BATCH(batch, 0);
  1973.     OUT_BATCH(batch, 0);
  1974.        
  1975.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  1976.     OUT_BATCH(batch, 0); /* without VS kernel */
  1977.     OUT_BATCH(batch, 0);
  1978.     OUT_BATCH(batch, 0);
  1979.     OUT_BATCH(batch, 0);
  1980.     OUT_BATCH(batch, 0); /* pass-through */
  1981. }
  1982.  
  1983. static void
  1984. gen6_emit_gs_state(VADriverContextP ctx)
  1985. {
  1986.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1987.     struct intel_batchbuffer *batch = i965->batch;
  1988.  
  1989.     /* disable GS constant buffer */
  1990.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
  1991.     OUT_BATCH(batch, 0);
  1992.     OUT_BATCH(batch, 0);
  1993.     OUT_BATCH(batch, 0);
  1994.     OUT_BATCH(batch, 0);
  1995.        
  1996.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  1997.     OUT_BATCH(batch, 0); /* without GS kernel */
  1998.     OUT_BATCH(batch, 0);
  1999.     OUT_BATCH(batch, 0);
  2000.     OUT_BATCH(batch, 0);
  2001.     OUT_BATCH(batch, 0);
  2002.     OUT_BATCH(batch, 0); /* pass-through */
  2003. }
  2004.  
  2005. static void
  2006. gen6_emit_clip_state(VADriverContextP ctx)
  2007. {
  2008.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2009.     struct intel_batchbuffer *batch = i965->batch;
  2010.  
  2011.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2012.     OUT_BATCH(batch, 0);
  2013.     OUT_BATCH(batch, 0); /* pass-through */
  2014.     OUT_BATCH(batch, 0);
  2015. }
  2016.  
  2017. static void
  2018. gen6_emit_sf_state(VADriverContextP ctx)
  2019. {
  2020.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2021.     struct intel_batchbuffer *batch = i965->batch;
  2022.  
  2023.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
  2024.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
  2025.               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
  2026.               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
  2027.     OUT_BATCH(batch, 0);
  2028.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2029.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
  2030.     OUT_BATCH(batch, 0);
  2031.     OUT_BATCH(batch, 0);
  2032.     OUT_BATCH(batch, 0);
  2033.     OUT_BATCH(batch, 0);
  2034.     OUT_BATCH(batch, 0); /* DW9 */
  2035.     OUT_BATCH(batch, 0);
  2036.     OUT_BATCH(batch, 0);
  2037.     OUT_BATCH(batch, 0);
  2038.     OUT_BATCH(batch, 0);
  2039.     OUT_BATCH(batch, 0); /* DW14 */
  2040.     OUT_BATCH(batch, 0);
  2041.     OUT_BATCH(batch, 0);
  2042.     OUT_BATCH(batch, 0);
  2043.     OUT_BATCH(batch, 0);
  2044.     OUT_BATCH(batch, 0); /* DW19 */
  2045. }
  2046.  
  2047. static void
  2048. gen6_emit_wm_state(VADriverContextP ctx, int kernel)
  2049. {
  2050.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2051.     struct intel_batchbuffer *batch = i965->batch;
  2052.     struct i965_render_state *render_state = &i965->render_state;
  2053.  
  2054.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
  2055.               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
  2056.               (5 - 2));
  2057.     OUT_RELOC(batch,
  2058.               render_state->curbe.bo,
  2059.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2060.               0);
  2061.     OUT_BATCH(batch, 0);
  2062.     OUT_BATCH(batch, 0);
  2063.     OUT_BATCH(batch, 0);
  2064.  
  2065.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
  2066.     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
  2067.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2068.               0);
  2069.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
  2070.               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2071.     OUT_BATCH(batch, 0);
  2072.     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
  2073.     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
  2074.               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
  2075.               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
  2076.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
  2077.               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2078.     OUT_BATCH(batch, 0);
  2079.     OUT_BATCH(batch, 0);
  2080. }
  2081.  
  2082. static void
  2083. gen6_emit_vertex_element_state(VADriverContextP ctx)
  2084. {
  2085.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2086.     struct intel_batchbuffer *batch = i965->batch;
  2087.  
  2088.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2089.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2090.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2091.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2092.               GEN6_VE0_VALID |
  2093.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2094.               (0 << VE0_OFFSET_SHIFT));
  2095.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2096.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2097.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2098.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2099.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2100.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2101.               GEN6_VE0_VALID |
  2102.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2103.               (8 << VE0_OFFSET_SHIFT));
  2104.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2105.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2106.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2107.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2108. }
  2109.  
  2110. static void
  2111. gen6_emit_vertices(VADriverContextP ctx)
  2112. {
  2113.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2114.     struct intel_batchbuffer *batch = i965->batch;
  2115.     struct i965_render_state *render_state = &i965->render_state;
  2116.  
  2117.     BEGIN_BATCH(batch, 11);
  2118.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  2119.     OUT_BATCH(batch,
  2120.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2121.               GEN6_VB0_VERTEXDATA |
  2122.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2123.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2124.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2125.     OUT_BATCH(batch, 0);
  2126.  
  2127.     OUT_BATCH(batch,
  2128.               CMD_3DPRIMITIVE |
  2129.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  2130.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  2131.               (0 << 9) |
  2132.               4);
  2133.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2134.     OUT_BATCH(batch, 0); /* start vertex offset */
  2135.     OUT_BATCH(batch, 1); /* single instance */
  2136.     OUT_BATCH(batch, 0); /* start instance location */
  2137.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  2138.     ADVANCE_BATCH(batch);
  2139. }
  2140.  
  2141. static void
  2142. gen6_render_emit_states(VADriverContextP ctx, int kernel)
  2143. {
  2144.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2145.     struct intel_batchbuffer *batch = i965->batch;
  2146.  
  2147.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2148.     intel_batchbuffer_emit_mi_flush(batch);
  2149.     gen6_emit_invarient_states(ctx);
  2150.     gen6_emit_state_base_address(ctx);
  2151.     gen6_emit_viewport_state_pointers(ctx);
  2152.     gen6_emit_urb(ctx);
  2153.     gen6_emit_cc_state_pointers(ctx);
  2154.     gen6_emit_sampler_state_pointers(ctx);
  2155.     gen6_emit_vs_state(ctx);
  2156.     gen6_emit_gs_state(ctx);
  2157.     gen6_emit_clip_state(ctx);
  2158.     gen6_emit_sf_state(ctx);
  2159.     gen6_emit_wm_state(ctx, kernel);
  2160.     gen6_emit_binding_table(ctx);
  2161.     gen6_emit_depth_buffer_state(ctx);
  2162.     gen6_emit_drawing_rectangle(ctx);
  2163.     gen6_emit_vertex_element_state(ctx);
  2164.     gen6_emit_vertices(ctx);
  2165.     intel_batchbuffer_end_atomic(batch);
  2166. }
  2167.  
  2168. static void
  2169. gen6_render_put_surface(
  2170.     VADriverContextP   ctx,
  2171.     VASurfaceID        surface,
  2172.     const VARectangle *src_rect,
  2173.     const VARectangle *dst_rect,
  2174.     unsigned int       flags
  2175. )
  2176. {
  2177.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2178.     struct intel_batchbuffer *batch = i965->batch;
  2179.  
  2180.     gen6_render_initialize(ctx);
  2181.     gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
  2182.     i965_clear_dest_region(ctx);
  2183.     gen6_render_emit_states(ctx, PS_KERNEL);
  2184.     intel_batchbuffer_flush(batch);
  2185. }
  2186.  
  2187. static void
  2188. gen6_subpicture_render_blend_state(VADriverContextP ctx)
  2189. {
  2190.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2191.     struct i965_render_state *render_state = &i965->render_state;
  2192.     struct gen6_blend_state *blend_state;
  2193.  
  2194.     dri_bo_unmap(render_state->cc.state);    
  2195.     dri_bo_map(render_state->cc.blend, 1);
  2196.     assert(render_state->cc.blend->virtual);
  2197.     blend_state = render_state->cc.blend->virtual;
  2198.     memset(blend_state, 0, sizeof(*blend_state));
  2199.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  2200.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  2201.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  2202.     blend_state->blend0.blend_enable = 1;
  2203.     blend_state->blend1.post_blend_clamp_enable = 1;
  2204.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2205.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  2206.     dri_bo_unmap(render_state->cc.blend);
  2207. }
  2208.  
  2209. static void
  2210. gen6_subpicture_render_setup_states(
  2211.     VADriverContextP   ctx,
  2212.     VASurfaceID        surface,
  2213.     const VARectangle *src_rect,
  2214.     const VARectangle *dst_rect
  2215. )
  2216. {
  2217.     i965_render_dest_surface_state(ctx, 0);
  2218.     i965_subpic_render_src_surfaces_state(ctx, surface);
  2219.     i965_render_sampler(ctx);
  2220.     i965_render_cc_viewport(ctx);
  2221.     gen6_render_color_calc_state(ctx);
  2222.     gen6_subpicture_render_blend_state(ctx);
  2223.     gen6_render_depth_stencil_state(ctx);
  2224.     i965_subpic_render_upload_constants(ctx, surface);
  2225.     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
  2226. }
  2227.  
  2228. static void
  2229. gen6_render_put_subpicture(
  2230.     VADriverContextP   ctx,
  2231.     VASurfaceID        surface,
  2232.     const VARectangle *src_rect,
  2233.     const VARectangle *dst_rect
  2234. )
  2235. {
  2236.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2237.     struct intel_batchbuffer *batch = i965->batch;
  2238.     struct object_surface *obj_surface = SURFACE(surface);
  2239.     unsigned int index = obj_surface->subpic_render_idx;
  2240.     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
  2241.  
  2242.     assert(obj_subpic);
  2243.     gen6_render_initialize(ctx);
  2244.     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
  2245.     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  2246.     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
  2247.     intel_batchbuffer_flush(batch);
  2248. }
  2249.  
  2250. /*
  2251.  * for GEN7
  2252.  */
  2253. static void
  2254. gen7_render_initialize(VADriverContextP ctx)
  2255. {
  2256.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2257.     struct i965_render_state *render_state = &i965->render_state;
  2258.     dri_bo *bo;
  2259.  
  2260.     /* VERTEX BUFFER */
  2261.     dri_bo_unreference(render_state->vb.vertex_buffer);
  2262.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2263.                       "vertex buffer",
  2264.                       4096,
  2265.                       4096);
  2266.     assert(bo);
  2267.     render_state->vb.vertex_buffer = bo;
  2268.  
  2269.     /* WM */
  2270.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  2271.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2272.                       "surface state & binding table",
  2273.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  2274.                       4096);
  2275.     assert(bo);
  2276.     render_state->wm.surface_state_binding_table_bo = bo;
  2277.  
  2278.     dri_bo_unreference(render_state->wm.sampler);
  2279.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2280.                       "sampler state",
  2281.                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
  2282.                       4096);
  2283.     assert(bo);
  2284.     render_state->wm.sampler = bo;
  2285.     render_state->wm.sampler_count = 0;
  2286.  
  2287.     /* COLOR CALCULATOR */
  2288.     dri_bo_unreference(render_state->cc.state);
  2289.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2290.                       "color calc state",
  2291.                       sizeof(struct gen6_color_calc_state),
  2292.                       4096);
  2293.     assert(bo);
  2294.     render_state->cc.state = bo;
  2295.  
  2296.     /* CC VIEWPORT */
  2297.     dri_bo_unreference(render_state->cc.viewport);
  2298.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2299.                       "cc viewport",
  2300.                       sizeof(struct i965_cc_viewport),
  2301.                       4096);
  2302.     assert(bo);
  2303.     render_state->cc.viewport = bo;
  2304.  
  2305.     /* BLEND STATE */
  2306.     dri_bo_unreference(render_state->cc.blend);
  2307.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2308.                       "blend state",
  2309.                       sizeof(struct gen6_blend_state),
  2310.                       4096);
  2311.     assert(bo);
  2312.     render_state->cc.blend = bo;
  2313.  
  2314.     /* DEPTH & STENCIL STATE */
  2315.     dri_bo_unreference(render_state->cc.depth_stencil);
  2316.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2317.                       "depth & stencil state",
  2318.                       sizeof(struct gen6_depth_stencil_state),
  2319.                       4096);
  2320.     assert(bo);
  2321.     render_state->cc.depth_stencil = bo;
  2322. }
  2323.  
  2324. static void
  2325. gen7_render_color_calc_state(VADriverContextP ctx)
  2326. {
  2327.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2328.     struct i965_render_state *render_state = &i965->render_state;
  2329.     struct gen6_color_calc_state *color_calc_state;
  2330.    
  2331.     dri_bo_map(render_state->cc.state, 1);
  2332.     assert(render_state->cc.state->virtual);
  2333.     color_calc_state = render_state->cc.state->virtual;
  2334.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  2335.     color_calc_state->constant_r = 1.0;
  2336.     color_calc_state->constant_g = 0.0;
  2337.     color_calc_state->constant_b = 1.0;
  2338.     color_calc_state->constant_a = 1.0;
  2339.     dri_bo_unmap(render_state->cc.state);
  2340. }
  2341.  
  2342. static void
  2343. gen7_render_blend_state(VADriverContextP ctx)
  2344. {
  2345.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2346.     struct i965_render_state *render_state = &i965->render_state;
  2347.     struct gen6_blend_state *blend_state;
  2348.    
  2349.     dri_bo_map(render_state->cc.blend, 1);
  2350.     assert(render_state->cc.blend->virtual);
  2351.     blend_state = render_state->cc.blend->virtual;
  2352.     memset(blend_state, 0, sizeof(*blend_state));
  2353.     blend_state->blend1.logic_op_enable = 1;
  2354.     blend_state->blend1.logic_op_func = 0xc;
  2355.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2356.     dri_bo_unmap(render_state->cc.blend);
  2357. }
  2358.  
  2359. static void
  2360. gen7_render_depth_stencil_state(VADriverContextP ctx)
  2361. {
  2362.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2363.     struct i965_render_state *render_state = &i965->render_state;
  2364.     struct gen6_depth_stencil_state *depth_stencil_state;
  2365.    
  2366.     dri_bo_map(render_state->cc.depth_stencil, 1);
  2367.     assert(render_state->cc.depth_stencil->virtual);
  2368.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  2369.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  2370.     dri_bo_unmap(render_state->cc.depth_stencil);
  2371. }
  2372.  
  2373. static void
  2374. gen7_render_sampler(VADriverContextP ctx)
  2375. {
  2376.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2377.     struct i965_render_state *render_state = &i965->render_state;
  2378.     struct gen7_sampler_state *sampler_state;
  2379.     int i;
  2380.    
  2381.     assert(render_state->wm.sampler_count > 0);
  2382.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  2383.  
  2384.     dri_bo_map(render_state->wm.sampler, 1);
  2385.     assert(render_state->wm.sampler->virtual);
  2386.     sampler_state = render_state->wm.sampler->virtual;
  2387.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  2388.         memset(sampler_state, 0, sizeof(*sampler_state));
  2389.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  2390.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  2391.         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2392.         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2393.         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2394.         sampler_state++;
  2395.     }
  2396.  
  2397.     dri_bo_unmap(render_state->wm.sampler);
  2398. }
  2399.  
  2400. static void
  2401. gen7_render_setup_states(
  2402.     VADriverContextP   ctx,
  2403.     VASurfaceID        surface,
  2404.     const VARectangle *src_rect,
  2405.     const VARectangle *dst_rect,
  2406.     unsigned int       flags
  2407. )
  2408. {
  2409.     i965_render_dest_surface_state(ctx, 0);
  2410.     i965_render_src_surfaces_state(ctx, surface, flags);
  2411.     gen7_render_sampler(ctx);
  2412.     i965_render_cc_viewport(ctx);
  2413.     gen7_render_color_calc_state(ctx);
  2414.     gen7_render_blend_state(ctx);
  2415.     gen7_render_depth_stencil_state(ctx);
  2416.     i965_render_upload_constants(ctx, surface);
  2417.     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
  2418. }
  2419.  
  2420. static void
  2421. gen7_emit_invarient_states(VADriverContextP ctx)
  2422. {
  2423.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2424.     struct intel_batchbuffer *batch = i965->batch;
  2425.  
  2426.     BEGIN_BATCH(batch, 1);
  2427.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  2428.     ADVANCE_BATCH(batch);
  2429.  
  2430.     BEGIN_BATCH(batch, 4);
  2431.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
  2432.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  2433.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  2434.     OUT_BATCH(batch, 0);
  2435.     OUT_BATCH(batch, 0);
  2436.     ADVANCE_BATCH(batch);
  2437.  
  2438.     BEGIN_BATCH(batch, 2);
  2439.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  2440.     OUT_BATCH(batch, 1);
  2441.     ADVANCE_BATCH(batch);
  2442.  
  2443.     /* Set system instruction pointer */
  2444.     BEGIN_BATCH(batch, 2);
  2445.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  2446.     OUT_BATCH(batch, 0);
  2447.     ADVANCE_BATCH(batch);
  2448. }
  2449.  
  2450. static void
  2451. gen7_emit_state_base_address(VADriverContextP ctx)
  2452. {
  2453.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2454.     struct intel_batchbuffer *batch = i965->batch;
  2455.     struct i965_render_state *render_state = &i965->render_state;
  2456.  
  2457.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  2458.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  2459.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  2460.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  2461.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  2462.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  2463.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  2464.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  2465.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  2466.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  2467. }
  2468.  
  2469. static void
  2470. gen7_emit_viewport_state_pointers(VADriverContextP ctx)
  2471. {
  2472.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2473.     struct intel_batchbuffer *batch = i965->batch;
  2474.     struct i965_render_state *render_state = &i965->render_state;
  2475.  
  2476.     BEGIN_BATCH(batch, 2);
  2477.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
  2478.     OUT_RELOC(batch,
  2479.               render_state->cc.viewport,
  2480.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2481.               0);
  2482.     ADVANCE_BATCH(batch);
  2483.  
  2484.     BEGIN_BATCH(batch, 2);
  2485.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
  2486.     OUT_BATCH(batch, 0);
  2487.     ADVANCE_BATCH(batch);
  2488. }
  2489.  
  2490. /*
  2491.  * URB layout on GEN7
  2492.  * ----------------------------------------
  2493.  * | PS Push Constants (8KB) | VS entries |
  2494.  * ----------------------------------------
  2495.  */
  2496. static void
  2497. gen7_emit_urb(VADriverContextP ctx)
  2498. {
  2499.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2500.     struct intel_batchbuffer *batch = i965->batch;
  2501.     unsigned int num_urb_entries = 32;
  2502.  
  2503.     if (IS_HASWELL(i965->intel.device_id))
  2504.         num_urb_entries = 64;
  2505.  
  2506.     BEGIN_BATCH(batch, 2);
  2507.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
  2508.     OUT_BATCH(batch, 8); /* in 1KBs */
  2509.     ADVANCE_BATCH(batch);
  2510.  
  2511.     BEGIN_BATCH(batch, 2);
  2512.     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
  2513.     OUT_BATCH(batch,
  2514.               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
  2515.               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  2516.               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2517.    ADVANCE_BATCH(batch);
  2518.  
  2519.    BEGIN_BATCH(batch, 2);
  2520.    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
  2521.    OUT_BATCH(batch,
  2522.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2523.              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2524.    ADVANCE_BATCH(batch);
  2525.  
  2526.    BEGIN_BATCH(batch, 2);
  2527.    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
  2528.    OUT_BATCH(batch,
  2529.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2530.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2531.    ADVANCE_BATCH(batch);
  2532.  
  2533.    BEGIN_BATCH(batch, 2);
  2534.    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
  2535.    OUT_BATCH(batch,
  2536.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2537.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2538.    ADVANCE_BATCH(batch);
  2539. }
  2540.  
  2541. static void
  2542. gen7_emit_cc_state_pointers(VADriverContextP ctx)
  2543. {
  2544.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2545.     struct intel_batchbuffer *batch = i965->batch;
  2546.     struct i965_render_state *render_state = &i965->render_state;
  2547.  
  2548.     BEGIN_BATCH(batch, 2);
  2549.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
  2550.     OUT_RELOC(batch,
  2551.               render_state->cc.state,
  2552.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2553.               1);
  2554.     ADVANCE_BATCH(batch);
  2555.  
  2556.     BEGIN_BATCH(batch, 2);
  2557.     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
  2558.     OUT_RELOC(batch,
  2559.               render_state->cc.blend,
  2560.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2561.               1);
  2562.     ADVANCE_BATCH(batch);
  2563.  
  2564.     BEGIN_BATCH(batch, 2);
  2565.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
  2566.     OUT_RELOC(batch,
  2567.               render_state->cc.depth_stencil,
  2568.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2569.               1);
  2570.     ADVANCE_BATCH(batch);
  2571. }
  2572.  
  2573. static void
  2574. gen7_emit_sampler_state_pointers(VADriverContextP ctx)
  2575. {
  2576.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2577.     struct intel_batchbuffer *batch = i965->batch;
  2578.     struct i965_render_state *render_state = &i965->render_state;
  2579.  
  2580.     BEGIN_BATCH(batch, 2);
  2581.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
  2582.     OUT_RELOC(batch,
  2583.               render_state->wm.sampler,
  2584.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2585.               0);
  2586.     ADVANCE_BATCH(batch);
  2587. }
  2588.  
  2589. static void
  2590. gen7_emit_binding_table(VADriverContextP ctx)
  2591. {
  2592.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2593.     struct intel_batchbuffer *batch = i965->batch;
  2594.  
  2595.     BEGIN_BATCH(batch, 2);
  2596.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
  2597.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  2598.     ADVANCE_BATCH(batch);
  2599. }
  2600.  
  2601. static void
  2602. gen7_emit_depth_buffer_state(VADriverContextP ctx)
  2603. {
  2604.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2605.     struct intel_batchbuffer *batch = i965->batch;
  2606.  
  2607.     BEGIN_BATCH(batch, 7);
  2608.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
  2609.     OUT_BATCH(batch,
  2610.               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
  2611.               (I965_SURFACE_NULL << 29));
  2612.     OUT_BATCH(batch, 0);
  2613.     OUT_BATCH(batch, 0);
  2614.     OUT_BATCH(batch, 0);
  2615.     OUT_BATCH(batch, 0);
  2616.     OUT_BATCH(batch, 0);
  2617.     ADVANCE_BATCH(batch);
  2618.  
  2619.     BEGIN_BATCH(batch, 3);
  2620.     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
  2621.     OUT_BATCH(batch, 0);
  2622.     OUT_BATCH(batch, 0);
  2623.     ADVANCE_BATCH(batch);
  2624. }
  2625.  
  2626. static void
  2627. gen7_emit_drawing_rectangle(VADriverContextP ctx)
  2628. {
  2629.     i965_render_drawing_rectangle(ctx);
  2630. }
  2631.  
  2632. static void
  2633. gen7_emit_vs_state(VADriverContextP ctx)
  2634. {
  2635.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2636.     struct intel_batchbuffer *batch = i965->batch;
  2637.  
  2638.     /* disable VS constant buffer */
  2639.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
  2640.     OUT_BATCH(batch, 0);
  2641.     OUT_BATCH(batch, 0);
  2642.     OUT_BATCH(batch, 0);
  2643.     OUT_BATCH(batch, 0);
  2644.     OUT_BATCH(batch, 0);
  2645.     OUT_BATCH(batch, 0);
  2646.        
  2647.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  2648.     OUT_BATCH(batch, 0); /* without VS kernel */
  2649.     OUT_BATCH(batch, 0);
  2650.     OUT_BATCH(batch, 0);
  2651.     OUT_BATCH(batch, 0);
  2652.     OUT_BATCH(batch, 0); /* pass-through */
  2653. }
  2654.  
  2655. static void
  2656. gen7_emit_bypass_state(VADriverContextP ctx)
  2657. {
  2658.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2659.     struct intel_batchbuffer *batch = i965->batch;
  2660.  
  2661.     /* bypass GS */
  2662.     BEGIN_BATCH(batch, 7);
  2663.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
  2664.     OUT_BATCH(batch, 0);
  2665.     OUT_BATCH(batch, 0);
  2666.     OUT_BATCH(batch, 0);
  2667.     OUT_BATCH(batch, 0);
  2668.     OUT_BATCH(batch, 0);
  2669.     OUT_BATCH(batch, 0);
  2670.     ADVANCE_BATCH(batch);
  2671.  
  2672.     BEGIN_BATCH(batch, 7);     
  2673.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  2674.     OUT_BATCH(batch, 0); /* without GS kernel */
  2675.     OUT_BATCH(batch, 0);
  2676.     OUT_BATCH(batch, 0);
  2677.     OUT_BATCH(batch, 0);
  2678.     OUT_BATCH(batch, 0);
  2679.     OUT_BATCH(batch, 0); /* pass-through */
  2680.     ADVANCE_BATCH(batch);
  2681.  
  2682.     BEGIN_BATCH(batch, 2);
  2683.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
  2684.     OUT_BATCH(batch, 0);
  2685.     ADVANCE_BATCH(batch);
  2686.  
  2687.     /* disable HS */
  2688.     BEGIN_BATCH(batch, 7);
  2689.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
  2690.     OUT_BATCH(batch, 0);
  2691.     OUT_BATCH(batch, 0);
  2692.     OUT_BATCH(batch, 0);
  2693.     OUT_BATCH(batch, 0);
  2694.     OUT_BATCH(batch, 0);
  2695.     OUT_BATCH(batch, 0);
  2696.     ADVANCE_BATCH(batch);
  2697.  
  2698.     BEGIN_BATCH(batch, 7);
  2699.     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
  2700.     OUT_BATCH(batch, 0);
  2701.     OUT_BATCH(batch, 0);
  2702.     OUT_BATCH(batch, 0);
  2703.     OUT_BATCH(batch, 0);
  2704.     OUT_BATCH(batch, 0);
  2705.     OUT_BATCH(batch, 0);
  2706.     ADVANCE_BATCH(batch);
  2707.  
  2708.     BEGIN_BATCH(batch, 2);
  2709.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
  2710.     OUT_BATCH(batch, 0);
  2711.     ADVANCE_BATCH(batch);
  2712.  
  2713.     /* Disable TE */
  2714.     BEGIN_BATCH(batch, 4);
  2715.     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
  2716.     OUT_BATCH(batch, 0);
  2717.     OUT_BATCH(batch, 0);
  2718.     OUT_BATCH(batch, 0);
  2719.     ADVANCE_BATCH(batch);
  2720.  
  2721.     /* Disable DS */
  2722.     BEGIN_BATCH(batch, 7);
  2723.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
  2724.     OUT_BATCH(batch, 0);
  2725.     OUT_BATCH(batch, 0);
  2726.     OUT_BATCH(batch, 0);
  2727.     OUT_BATCH(batch, 0);
  2728.     OUT_BATCH(batch, 0);
  2729.     OUT_BATCH(batch, 0);
  2730.     ADVANCE_BATCH(batch);
  2731.  
  2732.     BEGIN_BATCH(batch, 6);
  2733.     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
  2734.     OUT_BATCH(batch, 0);
  2735.     OUT_BATCH(batch, 0);
  2736.     OUT_BATCH(batch, 0);
  2737.     OUT_BATCH(batch, 0);
  2738.     OUT_BATCH(batch, 0);
  2739.     ADVANCE_BATCH(batch);
  2740.  
  2741.     BEGIN_BATCH(batch, 2);
  2742.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
  2743.     OUT_BATCH(batch, 0);
  2744.     ADVANCE_BATCH(batch);
  2745.  
  2746.     /* Disable STREAMOUT */
  2747.     BEGIN_BATCH(batch, 3);
  2748.     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
  2749.     OUT_BATCH(batch, 0);
  2750.     OUT_BATCH(batch, 0);
  2751.     ADVANCE_BATCH(batch);
  2752. }
  2753.  
  2754. static void
  2755. gen7_emit_clip_state(VADriverContextP ctx)
  2756. {
  2757.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2758.     struct intel_batchbuffer *batch = i965->batch;
  2759.  
  2760.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2761.     OUT_BATCH(batch, 0);
  2762.     OUT_BATCH(batch, 0); /* pass-through */
  2763.     OUT_BATCH(batch, 0);
  2764. }
  2765.  
  2766. static void
  2767. gen7_emit_sf_state(VADriverContextP ctx)
  2768. {
  2769.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2770.     struct intel_batchbuffer *batch = i965->batch;
  2771.  
  2772.     BEGIN_BATCH(batch, 14);
  2773.     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
  2774.     OUT_BATCH(batch,
  2775.               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
  2776.               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
  2777.               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
  2778.     OUT_BATCH(batch, 0);
  2779.     OUT_BATCH(batch, 0);
  2780.     OUT_BATCH(batch, 0); /* DW4 */
  2781.     OUT_BATCH(batch, 0);
  2782.     OUT_BATCH(batch, 0);
  2783.     OUT_BATCH(batch, 0);
  2784.     OUT_BATCH(batch, 0);
  2785.     OUT_BATCH(batch, 0); /* DW9 */
  2786.     OUT_BATCH(batch, 0);
  2787.     OUT_BATCH(batch, 0);
  2788.     OUT_BATCH(batch, 0);
  2789.     OUT_BATCH(batch, 0);
  2790.     ADVANCE_BATCH(batch);
  2791.  
  2792.     BEGIN_BATCH(batch, 7);
  2793.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
  2794.     OUT_BATCH(batch, 0);
  2795.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2796.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
  2797.     OUT_BATCH(batch, 0);
  2798.     OUT_BATCH(batch, 0);
  2799.     OUT_BATCH(batch, 0);
  2800.     ADVANCE_BATCH(batch);
  2801. }
  2802.  
  2803. static void
  2804. gen7_emit_wm_state(VADriverContextP ctx, int kernel)
  2805. {
  2806.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2807.     struct intel_batchbuffer *batch = i965->batch;
  2808.     struct i965_render_state *render_state = &i965->render_state;
  2809.     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
  2810.     unsigned int num_samples = 0;
  2811.  
  2812.     if (IS_HASWELL(i965->intel.device_id)) {
  2813.         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
  2814.         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
  2815.     }
  2816.  
  2817.     BEGIN_BATCH(batch, 3);
  2818.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
  2819.     OUT_BATCH(batch,
  2820.               GEN7_WM_DISPATCH_ENABLE |
  2821.               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2822.     OUT_BATCH(batch, 0);
  2823.     ADVANCE_BATCH(batch);
  2824.  
  2825.     BEGIN_BATCH(batch, 7);
  2826.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
  2827.     OUT_BATCH(batch, 1);
  2828.     OUT_BATCH(batch, 0);
  2829.     OUT_RELOC(batch,
  2830.               render_state->curbe.bo,
  2831.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2832.               0);
  2833.     OUT_BATCH(batch, 0);
  2834.     OUT_BATCH(batch, 0);
  2835.     OUT_BATCH(batch, 0);
  2836.     ADVANCE_BATCH(batch);
  2837.  
  2838.     BEGIN_BATCH(batch, 8);
  2839.     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
  2840.     OUT_RELOC(batch,
  2841.               render_state->render_kernels[kernel].bo,
  2842.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2843.               0);
  2844.     OUT_BATCH(batch,
  2845.               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
  2846.               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2847.     OUT_BATCH(batch, 0); /* scratch space base offset */
  2848.     OUT_BATCH(batch,
  2849.               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
  2850.               GEN7_PS_PUSH_CONSTANT_ENABLE |
  2851.               GEN7_PS_ATTRIBUTE_ENABLE |
  2852.               GEN7_PS_16_DISPATCH_ENABLE);
  2853.     OUT_BATCH(batch,
  2854.               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
  2855.     OUT_BATCH(batch, 0); /* kernel 1 pointer */
  2856.     OUT_BATCH(batch, 0); /* kernel 2 pointer */
  2857.     ADVANCE_BATCH(batch);
  2858. }
  2859.  
  2860. static void
  2861. gen7_emit_vertex_element_state(VADriverContextP ctx)
  2862. {
  2863.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2864.     struct intel_batchbuffer *batch = i965->batch;
  2865.  
  2866.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2867.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2868.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2869.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2870.               GEN6_VE0_VALID |
  2871.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2872.               (0 << VE0_OFFSET_SHIFT));
  2873.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2874.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2875.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2876.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2877.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2878.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2879.               GEN6_VE0_VALID |
  2880.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2881.               (8 << VE0_OFFSET_SHIFT));
  2882.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2883.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2884.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2885.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2886. }
  2887.  
  2888. static void
  2889. gen7_emit_vertices(VADriverContextP ctx)
  2890. {
  2891.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2892.     struct intel_batchbuffer *batch = i965->batch;
  2893.     struct i965_render_state *render_state = &i965->render_state;
  2894.  
  2895.     BEGIN_BATCH(batch, 5);
  2896.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
  2897.     OUT_BATCH(batch,
  2898.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2899.               GEN6_VB0_VERTEXDATA |
  2900.               GEN7_VB0_ADDRESS_MODIFYENABLE |
  2901.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2902.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2903.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2904.     OUT_BATCH(batch, 0);
  2905.     ADVANCE_BATCH(batch);
  2906.  
  2907.     BEGIN_BATCH(batch, 7);
  2908.     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
  2909.     OUT_BATCH(batch,
  2910.               _3DPRIM_RECTLIST |
  2911.               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
  2912.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2913.     OUT_BATCH(batch, 0); /* start vertex offset */
  2914.     OUT_BATCH(batch, 1); /* single instance */
  2915.     OUT_BATCH(batch, 0); /* start instance location */
  2916.     OUT_BATCH(batch, 0);
  2917.     ADVANCE_BATCH(batch);
  2918. }
  2919.  
  2920. static void
  2921. gen7_render_emit_states(VADriverContextP ctx, int kernel)
  2922. {
  2923.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2924.     struct intel_batchbuffer *batch = i965->batch;
  2925.  
  2926.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2927.     intel_batchbuffer_emit_mi_flush(batch);
  2928.     gen7_emit_invarient_states(ctx);
  2929.     gen7_emit_state_base_address(ctx);
  2930.     gen7_emit_viewport_state_pointers(ctx);
  2931.     gen7_emit_urb(ctx);
  2932.     gen7_emit_cc_state_pointers(ctx);
  2933.     gen7_emit_sampler_state_pointers(ctx);
  2934.     gen7_emit_bypass_state(ctx);
  2935.     gen7_emit_vs_state(ctx);
  2936.     gen7_emit_clip_state(ctx);
  2937.     gen7_emit_sf_state(ctx);
  2938.     gen7_emit_wm_state(ctx, kernel);
  2939.     gen7_emit_binding_table(ctx);
  2940.     gen7_emit_depth_buffer_state(ctx);
  2941.     gen7_emit_drawing_rectangle(ctx);
  2942.     gen7_emit_vertex_element_state(ctx);
  2943.     gen7_emit_vertices(ctx);
  2944.     intel_batchbuffer_end_atomic(batch);
  2945. }
  2946.  
  2947. static void
  2948. gen7_render_put_surface(
  2949.     VADriverContextP   ctx,
  2950.     VASurfaceID        surface,
  2951.     const VARectangle *src_rect,
  2952.     const VARectangle *dst_rect,
  2953.     unsigned int       flags
  2954. )
  2955. {
  2956.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2957.     struct intel_batchbuffer *batch = i965->batch;
  2958.  
  2959.     gen7_render_initialize(ctx);
  2960.     gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
  2961.     i965_clear_dest_region(ctx);
  2962.     gen7_render_emit_states(ctx, PS_KERNEL);
  2963.     intel_batchbuffer_flush(batch);
  2964. }
  2965.  
  2966. static void
  2967. gen7_subpicture_render_blend_state(VADriverContextP ctx)
  2968. {
  2969.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2970.     struct i965_render_state *render_state = &i965->render_state;
  2971.     struct gen6_blend_state *blend_state;
  2972.  
  2973.     dri_bo_unmap(render_state->cc.state);    
  2974.     dri_bo_map(render_state->cc.blend, 1);
  2975.     assert(render_state->cc.blend->virtual);
  2976.     blend_state = render_state->cc.blend->virtual;
  2977.     memset(blend_state, 0, sizeof(*blend_state));
  2978.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  2979.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  2980.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  2981.     blend_state->blend0.blend_enable = 1;
  2982.     blend_state->blend1.post_blend_clamp_enable = 1;
  2983.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2984.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  2985.     dri_bo_unmap(render_state->cc.blend);
  2986. }
  2987.  
  2988. static void
  2989. gen7_subpicture_render_setup_states(
  2990.     VADriverContextP   ctx,
  2991.     VASurfaceID        surface,
  2992.     const VARectangle *src_rect,
  2993.     const VARectangle *dst_rect
  2994. )
  2995. {
  2996.     i965_render_dest_surface_state(ctx, 0);
  2997.     i965_subpic_render_src_surfaces_state(ctx, surface);
  2998.     i965_render_sampler(ctx);
  2999.     i965_render_cc_viewport(ctx);
  3000.     gen7_render_color_calc_state(ctx);
  3001.     gen7_subpicture_render_blend_state(ctx);
  3002.     gen7_render_depth_stencil_state(ctx);
  3003.     i965_subpic_render_upload_constants(ctx, surface);
  3004.     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
  3005. }
  3006.  
  3007. static void
  3008. gen7_render_put_subpicture(
  3009.     VADriverContextP   ctx,
  3010.     VASurfaceID        surface,
  3011.     const VARectangle *src_rect,
  3012.     const VARectangle *dst_rect
  3013. )
  3014. {
  3015.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3016.     struct intel_batchbuffer *batch = i965->batch;
  3017.     struct object_surface *obj_surface = SURFACE(surface);
  3018.     unsigned int index = obj_surface->subpic_render_idx;
  3019.     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
  3020.  
  3021.     assert(obj_subpic);
  3022.     gen7_render_initialize(ctx);
  3023.     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
  3024.     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  3025.     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
  3026.     intel_batchbuffer_flush(batch);
  3027. }
  3028.  
  3029.  
  3030. /*
  3031.  * global functions
  3032.  */
  3033. VAStatus
  3034. i965_DestroySurfaces(VADriverContextP ctx,
  3035.                      VASurfaceID *surface_list,
  3036.                      int num_surfaces);
  3037. void
  3038. intel_render_put_surface(
  3039.     VADriverContextP   ctx,
  3040.     VASurfaceID        surface,
  3041.     const VARectangle *src_rect,
  3042.     const VARectangle *dst_rect,
  3043.     unsigned int       flags
  3044. )
  3045. {
  3046.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3047.     int has_done_scaling = 0;
  3048.     VASurfaceID in_surface_id = surface;
  3049.     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
  3050.  
  3051.     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
  3052.  
  3053.     if (out_surface_id != VA_INVALID_ID)
  3054.         in_surface_id = out_surface_id;
  3055.  
  3056.     if (IS_GEN7(i965->intel.device_id))
  3057.         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
  3058.     else if (IS_GEN6(i965->intel.device_id))
  3059.         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
  3060.     else
  3061.         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
  3062.  
  3063.     if (in_surface_id != surface)
  3064.         i965_DestroySurfaces(ctx, &in_surface_id, 1);
  3065. }
  3066.  
  3067. void
  3068. intel_render_put_subpicture(
  3069.     VADriverContextP   ctx,
  3070.     VASurfaceID        surface,
  3071.     const VARectangle *src_rect,
  3072.     const VARectangle *dst_rect
  3073. )
  3074. {
  3075.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3076.  
  3077.     if (IS_GEN7(i965->intel.device_id))
  3078.         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
  3079.     else if (IS_GEN6(i965->intel.device_id))
  3080.         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
  3081.     else
  3082.         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
  3083. }
  3084.  
  3085. Bool
  3086. i965_render_init(VADriverContextP ctx)
  3087. {
  3088.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3089.     struct i965_render_state *render_state = &i965->render_state;
  3090.     int i;
  3091.  
  3092.     printf("%s\n", __FUNCTION__);
  3093.  
  3094.     /* kernel */
  3095.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
  3096.                                  sizeof(render_kernels_gen5[0])));
  3097.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
  3098.                                  sizeof(render_kernels_gen6[0])));
  3099.  
  3100.     if (IS_GEN7(i965->intel.device_id))
  3101.         memcpy(render_state->render_kernels,
  3102.                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
  3103.                sizeof(render_state->render_kernels));
  3104.     else if (IS_GEN6(i965->intel.device_id))
  3105.         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
  3106.     else if (IS_IRONLAKE(i965->intel.device_id))
  3107.         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
  3108.     else
  3109.         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
  3110.  
  3111.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3112.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3113.  
  3114.         if (!kernel->size)
  3115.             continue;
  3116.  
  3117.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  3118.                                   kernel->name,
  3119.                                   kernel->size, 0x1000);
  3120.         assert(kernel->bo);
  3121.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  3122.     }
  3123.  
  3124.     /* constant buffer */
  3125.     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
  3126.                       "constant buffer",
  3127.                       4096, 64);
  3128.     assert(render_state->curbe.bo);
  3129.  
  3130.     if (IS_IVB_GT1(i965->intel.device_id) ||
  3131.         IS_HSW_GT1(i965->intel.device_id)) {
  3132.         render_state->max_wm_threads = 48;
  3133.     } else if (IS_IVB_GT2(i965->intel.device_id) ||
  3134.                IS_HSW_GT2(i965->intel.device_id)) {
  3135.         render_state->max_wm_threads = 172;
  3136.     } else if (IS_SNB_GT1(i965->intel.device_id)) {
  3137.         render_state->max_wm_threads = 40;
  3138.     } else if (IS_SNB_GT2(i965->intel.device_id)) {
  3139.         render_state->max_wm_threads = 80;
  3140.     } else if (IS_IRONLAKE(i965->intel.device_id)) {
  3141.         render_state->max_wm_threads = 72; /* 12 * 6 */
  3142.     } else if (IS_G4X(i965->intel.device_id)) {
  3143.         render_state->max_wm_threads = 50; /* 12 * 5 */
  3144.     } else {
  3145.         /* should never get here !!! */
  3146.         assert(0);
  3147.     }
  3148.  
  3149.     return True;
  3150. }
  3151.  
  3152. Bool
  3153. i965_render_terminate(VADriverContextP ctx)
  3154. {
  3155.     int i;
  3156.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3157.     struct i965_render_state *render_state = &i965->render_state;
  3158.  
  3159.     dri_bo_unreference(render_state->curbe.bo);
  3160.     render_state->curbe.bo = NULL;
  3161.  
  3162.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3163.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3164.        
  3165.         dri_bo_unreference(kernel->bo);
  3166.         kernel->bo = NULL;
  3167.     }
  3168.  
  3169.     dri_bo_unreference(render_state->vb.vertex_buffer);
  3170.     render_state->vb.vertex_buffer = NULL;
  3171.     dri_bo_unreference(render_state->vs.state);
  3172.     render_state->vs.state = NULL;
  3173.     dri_bo_unreference(render_state->sf.state);
  3174.     render_state->sf.state = NULL;
  3175.     dri_bo_unreference(render_state->wm.sampler);
  3176.     render_state->wm.sampler = NULL;
  3177.     dri_bo_unreference(render_state->wm.state);
  3178.     render_state->wm.state = NULL;
  3179.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  3180.     dri_bo_unreference(render_state->cc.viewport);
  3181.     render_state->cc.viewport = NULL;
  3182.     dri_bo_unreference(render_state->cc.state);
  3183.     render_state->cc.state = NULL;
  3184.     dri_bo_unreference(render_state->cc.blend);
  3185.     render_state->cc.blend = NULL;
  3186.     dri_bo_unreference(render_state->cc.depth_stencil);
  3187.     render_state->cc.depth_stencil = NULL;
  3188.  
  3189.     if (render_state->draw_region) {
  3190.         dri_bo_unreference(render_state->draw_region->bo);
  3191.         free(render_state->draw_region);
  3192.         render_state->draw_region = NULL;
  3193.     }
  3194.  
  3195.     return True;
  3196. }
  3197.  
  3198.