Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "util/u_dual_blend.h"
  29. #include "util/u_half.h"
  30. #include "brw_defines.h"
  31. #include "intel_reg.h"
  32.  
  33. #include "ilo_context.h"
  34. #include "ilo_cp.h"
  35. #include "ilo_format.h"
  36. #include "ilo_resource.h"
  37. #include "ilo_shader.h"
  38. #include "ilo_state.h"
  39. #include "ilo_gpe_gen6.h"
  40.  
  41. /**
  42.  * Translate winsys tiling to hardware tiling.
  43.  */
  44. int
  45. ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
  46. {
  47.    switch (tiling) {
  48.    case INTEL_TILING_NONE:
  49.       return 0;
  50.    case INTEL_TILING_X:
  51.       return BRW_SURFACE_TILED;
  52.    case INTEL_TILING_Y:
  53.       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  54.    default:
  55.       assert(!"unknown tiling");
  56.       return 0;
  57.    }
  58. }
  59.  
  60. /**
  61.  * Translate a pipe primitive type to the matching hardware primitive type.
  62.  */
  63. int
  64. ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
  65. {
  66.    static const int prim_mapping[PIPE_PRIM_MAX] = {
  67.       [PIPE_PRIM_POINTS]                     = _3DPRIM_POINTLIST,
  68.       [PIPE_PRIM_LINES]                      = _3DPRIM_LINELIST,
  69.       [PIPE_PRIM_LINE_LOOP]                  = _3DPRIM_LINELOOP,
  70.       [PIPE_PRIM_LINE_STRIP]                 = _3DPRIM_LINESTRIP,
  71.       [PIPE_PRIM_TRIANGLES]                  = _3DPRIM_TRILIST,
  72.       [PIPE_PRIM_TRIANGLE_STRIP]             = _3DPRIM_TRISTRIP,
  73.       [PIPE_PRIM_TRIANGLE_FAN]               = _3DPRIM_TRIFAN,
  74.       [PIPE_PRIM_QUADS]                      = _3DPRIM_QUADLIST,
  75.       [PIPE_PRIM_QUAD_STRIP]                 = _3DPRIM_QUADSTRIP,
  76.       [PIPE_PRIM_POLYGON]                    = _3DPRIM_POLYGON,
  77.       [PIPE_PRIM_LINES_ADJACENCY]            = _3DPRIM_LINELIST_ADJ,
  78.       [PIPE_PRIM_LINE_STRIP_ADJACENCY]       = _3DPRIM_LINESTRIP_ADJ,
  79.       [PIPE_PRIM_TRIANGLES_ADJACENCY]        = _3DPRIM_TRILIST_ADJ,
  80.       [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]   = _3DPRIM_TRISTRIP_ADJ,
  81.    };
  82.  
  83.    assert(prim_mapping[prim]);
  84.  
  85.    return prim_mapping[prim];
  86. }
  87.  
  88. /**
  89.  * Translate a pipe texture target to the matching hardware surface type.
  90.  */
  91. int
  92. ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
  93. {
  94.    switch (target) {
  95.    case PIPE_BUFFER:
  96.       return BRW_SURFACE_BUFFER;
  97.    case PIPE_TEXTURE_1D:
  98.    case PIPE_TEXTURE_1D_ARRAY:
  99.       return BRW_SURFACE_1D;
  100.    case PIPE_TEXTURE_2D:
  101.    case PIPE_TEXTURE_RECT:
  102.    case PIPE_TEXTURE_2D_ARRAY:
  103.       return BRW_SURFACE_2D;
  104.    case PIPE_TEXTURE_3D:
  105.       return BRW_SURFACE_3D;
  106.    case PIPE_TEXTURE_CUBE:
  107.    case PIPE_TEXTURE_CUBE_ARRAY:
  108.       return BRW_SURFACE_CUBE;
  109.    default:
  110.       assert(!"unknown texture target");
  111.       return BRW_SURFACE_BUFFER;
  112.    }
  113. }
  114.  
  115. /**
  116.  * Translate a depth/stencil pipe format to the matching hardware
  117.  * format.  Return -1 on errors.
  118.  */
  119. static int
  120. gen6_translate_depth_format(enum pipe_format format)
  121. {
  122.    switch (format) {
  123.    case PIPE_FORMAT_Z16_UNORM:
  124.       return BRW_DEPTHFORMAT_D16_UNORM;
  125.    case PIPE_FORMAT_Z32_FLOAT:
  126.       return BRW_DEPTHFORMAT_D32_FLOAT;
  127.    case PIPE_FORMAT_Z24X8_UNORM:
  128.       return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
  129.    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
  130.       return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
  131.    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
  132.       return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
  133.    default:
  134.       return -1;
  135.    }
  136. }
  137.  
  138. /**
  139.  * Translate a pipe logicop to the matching hardware logicop.
  140.  */
  141. static int
  142. gen6_translate_pipe_logicop(unsigned logicop)
  143. {
  144.    switch (logicop) {
  145.    case PIPE_LOGICOP_CLEAR:         return BRW_LOGICOPFUNCTION_CLEAR;
  146.    case PIPE_LOGICOP_NOR:           return BRW_LOGICOPFUNCTION_NOR;
  147.    case PIPE_LOGICOP_AND_INVERTED:  return BRW_LOGICOPFUNCTION_AND_INVERTED;
  148.    case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
  149.    case PIPE_LOGICOP_AND_REVERSE:   return BRW_LOGICOPFUNCTION_AND_REVERSE;
  150.    case PIPE_LOGICOP_INVERT:        return BRW_LOGICOPFUNCTION_INVERT;
  151.    case PIPE_LOGICOP_XOR:           return BRW_LOGICOPFUNCTION_XOR;
  152.    case PIPE_LOGICOP_NAND:          return BRW_LOGICOPFUNCTION_NAND;
  153.    case PIPE_LOGICOP_AND:           return BRW_LOGICOPFUNCTION_AND;
  154.    case PIPE_LOGICOP_EQUIV:         return BRW_LOGICOPFUNCTION_EQUIV;
  155.    case PIPE_LOGICOP_NOOP:          return BRW_LOGICOPFUNCTION_NOOP;
  156.    case PIPE_LOGICOP_OR_INVERTED:   return BRW_LOGICOPFUNCTION_OR_INVERTED;
  157.    case PIPE_LOGICOP_COPY:          return BRW_LOGICOPFUNCTION_COPY;
  158.    case PIPE_LOGICOP_OR_REVERSE:    return BRW_LOGICOPFUNCTION_OR_REVERSE;
  159.    case PIPE_LOGICOP_OR:            return BRW_LOGICOPFUNCTION_OR;
  160.    case PIPE_LOGICOP_SET:           return BRW_LOGICOPFUNCTION_SET;
  161.    default:
  162.       assert(!"unknown logicop function");
  163.       return BRW_LOGICOPFUNCTION_CLEAR;
  164.    }
  165. }
  166.  
  167. /**
  168.  * Translate a pipe blend function to the matching hardware blend function.
  169.  */
  170. static int
  171. gen6_translate_pipe_blend(unsigned blend)
  172. {
  173.    switch (blend) {
  174.    case PIPE_BLEND_ADD:                return BRW_BLENDFUNCTION_ADD;
  175.    case PIPE_BLEND_SUBTRACT:           return BRW_BLENDFUNCTION_SUBTRACT;
  176.    case PIPE_BLEND_REVERSE_SUBTRACT:   return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
  177.    case PIPE_BLEND_MIN:                return BRW_BLENDFUNCTION_MIN;
  178.    case PIPE_BLEND_MAX:                return BRW_BLENDFUNCTION_MAX;
  179.    default:
  180.       assert(!"unknown blend function");
  181.       return BRW_BLENDFUNCTION_ADD;
  182.    };
  183. }
  184.  
  185. /**
  186.  * Translate a pipe blend factor to the matching hardware blend factor.
  187.  */
  188. static int
  189. gen6_translate_pipe_blendfactor(unsigned blendfactor)
  190. {
  191.    switch (blendfactor) {
  192.    case PIPE_BLENDFACTOR_ONE:                return BRW_BLENDFACTOR_ONE;
  193.    case PIPE_BLENDFACTOR_SRC_COLOR:          return BRW_BLENDFACTOR_SRC_COLOR;
  194.    case PIPE_BLENDFACTOR_SRC_ALPHA:          return BRW_BLENDFACTOR_SRC_ALPHA;
  195.    case PIPE_BLENDFACTOR_DST_ALPHA:          return BRW_BLENDFACTOR_DST_ALPHA;
  196.    case PIPE_BLENDFACTOR_DST_COLOR:          return BRW_BLENDFACTOR_DST_COLOR;
  197.    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
  198.    case PIPE_BLENDFACTOR_CONST_COLOR:        return BRW_BLENDFACTOR_CONST_COLOR;
  199.    case PIPE_BLENDFACTOR_CONST_ALPHA:        return BRW_BLENDFACTOR_CONST_ALPHA;
  200.    case PIPE_BLENDFACTOR_SRC1_COLOR:         return BRW_BLENDFACTOR_SRC1_COLOR;
  201.    case PIPE_BLENDFACTOR_SRC1_ALPHA:         return BRW_BLENDFACTOR_SRC1_ALPHA;
  202.    case PIPE_BLENDFACTOR_ZERO:               return BRW_BLENDFACTOR_ZERO;
  203.    case PIPE_BLENDFACTOR_INV_SRC_COLOR:      return BRW_BLENDFACTOR_INV_SRC_COLOR;
  204.    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:      return BRW_BLENDFACTOR_INV_SRC_ALPHA;
  205.    case PIPE_BLENDFACTOR_INV_DST_ALPHA:      return BRW_BLENDFACTOR_INV_DST_ALPHA;
  206.    case PIPE_BLENDFACTOR_INV_DST_COLOR:      return BRW_BLENDFACTOR_INV_DST_COLOR;
  207.    case PIPE_BLENDFACTOR_INV_CONST_COLOR:    return BRW_BLENDFACTOR_INV_CONST_COLOR;
  208.    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:    return BRW_BLENDFACTOR_INV_CONST_ALPHA;
  209.    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:     return BRW_BLENDFACTOR_INV_SRC1_COLOR;
  210.    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:     return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
  211.    default:
  212.       assert(!"unknown blend factor");
  213.       return BRW_BLENDFACTOR_ONE;
  214.    };
  215. }
  216.  
  217. /**
  218.  * Translate a pipe stencil op to the matching hardware stencil op.
  219.  */
  220. static int
  221. gen6_translate_pipe_stencil_op(unsigned stencil_op)
  222. {
  223.    switch (stencil_op) {
  224.    case PIPE_STENCIL_OP_KEEP:       return BRW_STENCILOP_KEEP;
  225.    case PIPE_STENCIL_OP_ZERO:       return BRW_STENCILOP_ZERO;
  226.    case PIPE_STENCIL_OP_REPLACE:    return BRW_STENCILOP_REPLACE;
  227.    case PIPE_STENCIL_OP_INCR:       return BRW_STENCILOP_INCRSAT;
  228.    case PIPE_STENCIL_OP_DECR:       return BRW_STENCILOP_DECRSAT;
  229.    case PIPE_STENCIL_OP_INCR_WRAP:  return BRW_STENCILOP_INCR;
  230.    case PIPE_STENCIL_OP_DECR_WRAP:  return BRW_STENCILOP_DECR;
  231.    case PIPE_STENCIL_OP_INVERT:     return BRW_STENCILOP_INVERT;
  232.    default:
  233.       assert(!"unknown stencil op");
  234.       return BRW_STENCILOP_KEEP;
  235.    }
  236. }
  237.  
  238. /**
  239.  * Translate a pipe texture mipfilter to the matching hardware mipfilter.
  240.  */
  241. static int
  242. gen6_translate_tex_mipfilter(unsigned filter)
  243. {
  244.    switch (filter) {
  245.    case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
  246.    case PIPE_TEX_MIPFILTER_LINEAR:  return BRW_MIPFILTER_LINEAR;
  247.    case PIPE_TEX_MIPFILTER_NONE:    return BRW_MIPFILTER_NONE;
  248.    default:
  249.       assert(!"unknown mipfilter");
  250.       return BRW_MIPFILTER_NONE;
  251.    }
  252. }
  253.  
  254. /**
  255.  * Translate a pipe texture filter to the matching hardware mapfilter.
  256.  */
  257. static int
  258. gen6_translate_tex_filter(unsigned filter)
  259. {
  260.    switch (filter) {
  261.    case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
  262.    case PIPE_TEX_FILTER_LINEAR:  return BRW_MAPFILTER_LINEAR;
  263.    default:
  264.       assert(!"unknown sampler filter");
  265.       return BRW_MAPFILTER_NEAREST;
  266.    }
  267. }
  268.  
  269. /**
  270.  * Translate a pipe texture coordinate wrapping mode to the matching hardware
  271.  * wrapping mode.
  272.  */
  273. static int
  274. gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
  275. {
  276.    /* clamp to edge or border? */
  277.    if (wrap == PIPE_TEX_WRAP_CLAMP) {
  278.       wrap = (clamp_to_edge) ?
  279.          PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
  280.    }
  281.  
  282.    switch (wrap) {
  283.    case PIPE_TEX_WRAP_REPEAT:             return BRW_TEXCOORDMODE_WRAP;
  284.    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:      return BRW_TEXCOORDMODE_CLAMP;
  285.    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:    return BRW_TEXCOORDMODE_CLAMP_BORDER;
  286.    case PIPE_TEX_WRAP_MIRROR_REPEAT:      return BRW_TEXCOORDMODE_MIRROR;
  287.    case PIPE_TEX_WRAP_CLAMP:
  288.    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  289.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  290.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  291.    default:
  292.       assert(!"unknown sampler wrap mode");
  293.       return BRW_TEXCOORDMODE_WRAP;
  294.    }
  295. }
  296.  
  297. /**
  298.  * Translate a pipe DSA test function to the matching hardware compare
  299.  * function.
  300.  */
  301. static int
  302. gen6_translate_dsa_func(unsigned func)
  303. {
  304.    switch (func) {
  305.    case PIPE_FUNC_NEVER:      return BRW_COMPAREFUNCTION_NEVER;
  306.    case PIPE_FUNC_LESS:       return BRW_COMPAREFUNCTION_LESS;
  307.    case PIPE_FUNC_EQUAL:      return BRW_COMPAREFUNCTION_EQUAL;
  308.    case PIPE_FUNC_LEQUAL:     return BRW_COMPAREFUNCTION_LEQUAL;
  309.    case PIPE_FUNC_GREATER:    return BRW_COMPAREFUNCTION_GREATER;
  310.    case PIPE_FUNC_NOTEQUAL:   return BRW_COMPAREFUNCTION_NOTEQUAL;
  311.    case PIPE_FUNC_GEQUAL:     return BRW_COMPAREFUNCTION_GEQUAL;
  312.    case PIPE_FUNC_ALWAYS:     return BRW_COMPAREFUNCTION_ALWAYS;
  313.    default:
  314.       assert(!"unknown depth/stencil/alpha test function");
  315.       return BRW_COMPAREFUNCTION_NEVER;
  316.    }
  317. }
  318.  
  319. /**
  320.  * Translate a pipe shadow compare function to the matching hardware shadow
  321.  * function.
  322.  */
  323. static int
  324. gen6_translate_shadow_func(unsigned func)
  325. {
  326.    /*
  327.     * For PIPE_FUNC_x, the reference value is on the left-hand side of the
  328.     * comparison, and 1.0 is returned when the comparison is true.
  329.     *
  330.     * For BRW_PREFILTER_x, the reference value is on the right-hand side of
  331.     * the comparison, and 0.0 is returned when the comparison is true.
  332.     */
  333.    switch (func) {
  334.    case PIPE_FUNC_NEVER:      return BRW_PREFILTER_ALWAYS;
  335.    case PIPE_FUNC_LESS:       return BRW_PREFILTER_LEQUAL;
  336.    case PIPE_FUNC_EQUAL:      return BRW_PREFILTER_NOTEQUAL;
  337.    case PIPE_FUNC_LEQUAL:     return BRW_PREFILTER_LESS;
  338.    case PIPE_FUNC_GREATER:    return BRW_PREFILTER_GEQUAL;
  339.    case PIPE_FUNC_NOTEQUAL:   return BRW_PREFILTER_EQUAL;
  340.    case PIPE_FUNC_GEQUAL:     return BRW_PREFILTER_GREATER;
  341.    case PIPE_FUNC_ALWAYS:     return BRW_PREFILTER_NEVER;
  342.    default:
  343.       assert(!"unknown shadow compare function");
  344.       return BRW_PREFILTER_NEVER;
  345.    }
  346. }
  347.  
  348. /**
  349.  * Translate an index size to the matching hardware index format.
  350.  */
  351. static int
  352. gen6_translate_index_size(int size)
  353. {
  354.    switch (size) {
  355.    case 4: return BRW_INDEX_DWORD;
  356.    case 2: return BRW_INDEX_WORD;
  357.    case 1: return BRW_INDEX_BYTE;
  358.    default:
  359.       assert(!"unknown index size");
  360.       return BRW_INDEX_BYTE;
  361.    }
  362. }
  363.  
  364. static void
  365. gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
  366.                              struct intel_bo *general_state_bo,
  367.                              struct intel_bo *surface_state_bo,
  368.                              struct intel_bo *dynamic_state_bo,
  369.                              struct intel_bo *indirect_object_bo,
  370.                              struct intel_bo *instruction_bo,
  371.                              uint32_t general_state_size,
  372.                              uint32_t dynamic_state_size,
  373.                              uint32_t indirect_object_size,
  374.                              uint32_t instruction_size,
  375.                              struct ilo_cp *cp)
  376. {
  377.    const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
  378.    const uint8_t cmd_len = 10;
  379.  
  380.    ILO_GPE_VALID_GEN(dev, 6, 7);
  381.  
  382.    /* 4K-page aligned */
  383.    assert(((general_state_size | dynamic_state_size |
  384.             indirect_object_size | instruction_size) & 0xfff) == 0);
  385.  
  386.    ilo_cp_begin(cp, cmd_len);
  387.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  388.  
  389.    ilo_cp_write_bo(cp, 1, general_state_bo,
  390.                        INTEL_DOMAIN_RENDER,
  391.                        0);
  392.    ilo_cp_write_bo(cp, 1, surface_state_bo,
  393.                        INTEL_DOMAIN_SAMPLER,
  394.                        0);
  395.    ilo_cp_write_bo(cp, 1, dynamic_state_bo,
  396.                        INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
  397.                        0);
  398.    ilo_cp_write_bo(cp, 1, indirect_object_bo,
  399.                        0,
  400.                        0);
  401.    ilo_cp_write_bo(cp, 1, instruction_bo,
  402.                        INTEL_DOMAIN_INSTRUCTION,
  403.                        0);
  404.  
  405.    if (general_state_size) {
  406.       ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
  407.                           INTEL_DOMAIN_RENDER,
  408.                           0);
  409.    }
  410.    else {
  411.       /* skip range check */
  412.       ilo_cp_write(cp, 1);
  413.    }
  414.  
  415.    if (dynamic_state_size) {
  416.       ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
  417.                           INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
  418.                           0);
  419.    }
  420.    else {
  421.       /* skip range check */
  422.       ilo_cp_write(cp, 0xfffff000 + 1);
  423.    }
  424.  
  425.    if (indirect_object_size) {
  426.       ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
  427.                           0,
  428.                           0);
  429.    }
  430.    else {
  431.       /* skip range check */
  432.       ilo_cp_write(cp, 0xfffff000 + 1);
  433.    }
  434.  
  435.    if (instruction_size) {
  436.       ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
  437.                           INTEL_DOMAIN_INSTRUCTION,
  438.                           0);
  439.    }
  440.    else {
  441.       /* skip range check */
  442.       ilo_cp_write(cp, 1);
  443.    }
  444.  
  445.    ilo_cp_end(cp);
  446. }
  447.  
  448. static void
  449. gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
  450.                     uint32_t sip,
  451.                     struct ilo_cp *cp)
  452. {
  453.    const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
  454.    const uint8_t cmd_len = 2;
  455.  
  456.    ILO_GPE_VALID_GEN(dev, 6, 7);
  457.  
  458.    ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
  459.    ilo_cp_write(cp, cmd);
  460.    ilo_cp_write(cp, sip);
  461.    ilo_cp_end(cp);
  462. }
  463.  
  464. static void
  465. gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
  466.                                 bool enable,
  467.                                 struct ilo_cp *cp)
  468. {
  469.    const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
  470.    const uint8_t cmd_len = 1;
  471.  
  472.    ILO_GPE_VALID_GEN(dev, 6, 7);
  473.  
  474.    ilo_cp_begin(cp, cmd_len);
  475.    ilo_cp_write(cp, cmd | enable);
  476.    ilo_cp_end(cp);
  477. }
  478.  
  479. static void
  480. gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
  481.                           int pipeline,
  482.                           struct ilo_cp *cp)
  483. {
  484.    const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
  485.    const uint8_t cmd_len = 1;
  486.  
  487.    ILO_GPE_VALID_GEN(dev, 6, 7);
  488.  
  489.    /* 3D or media */
  490.    assert(pipeline == 0x0 || pipeline == 0x1);
  491.  
  492.    ilo_cp_begin(cp, cmd_len);
  493.    ilo_cp_write(cp, cmd | pipeline);
  494.    ilo_cp_end(cp);
  495. }
  496.  
  497. static void
  498. gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
  499.                           int max_threads, int num_urb_entries,
  500.                           int urb_entry_size,
  501.                           struct ilo_cp *cp)
  502. {
  503.    const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
  504.    const uint8_t cmd_len = 8;
  505.    uint32_t dw2, dw4;
  506.  
  507.    ILO_GPE_VALID_GEN(dev, 6, 6);
  508.  
  509.    dw2 = (max_threads - 1) << 16 |
  510.          num_urb_entries << 8 |
  511.          1 << 7 | /* Reset Gateway Timer */
  512.          1 << 6;  /* Bypass Gateway Control */
  513.  
  514.    dw4 = urb_entry_size << 16 |  /* URB Entry Allocation Size */
  515.          480;                    /* CURBE Allocation Size */
  516.  
  517.    ilo_cp_begin(cp, cmd_len);
  518.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  519.    ilo_cp_write(cp, 0); /* scratch */
  520.    ilo_cp_write(cp, dw2);
  521.    ilo_cp_write(cp, 0); /* MBZ */
  522.    ilo_cp_write(cp, dw4);
  523.    ilo_cp_write(cp, 0); /* scoreboard */
  524.    ilo_cp_write(cp, 0);
  525.    ilo_cp_write(cp, 0);
  526.    ilo_cp_end(cp);
  527. }
  528.  
  529. static void
  530. gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
  531.                           uint32_t buf, int size,
  532.                           struct ilo_cp *cp)
  533. {
  534.    const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
  535.    const uint8_t cmd_len = 4;
  536.  
  537.    ILO_GPE_VALID_GEN(dev, 6, 6);
  538.  
  539.    assert(buf % 32 == 0);
  540.    /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
  541.    size = align(size, 32);
  542.  
  543.    ilo_cp_begin(cp, cmd_len);
  544.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  545.    ilo_cp_write(cp, 0); /* MBZ */
  546.    ilo_cp_write(cp, size);
  547.    ilo_cp_write(cp, buf);
  548.    ilo_cp_end(cp);
  549. }
  550.  
  551. static void
  552. gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
  553.                                           uint32_t offset, int num_ids,
  554.                                           struct ilo_cp *cp)
  555. {
  556.    const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
  557.    const uint8_t cmd_len = 4;
  558.  
  559.    ILO_GPE_VALID_GEN(dev, 6, 6);
  560.  
  561.    assert(offset % 32 == 0);
  562.  
  563.    ilo_cp_begin(cp, cmd_len);
  564.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  565.    ilo_cp_write(cp, 0); /* MBZ */
  566.    /* every ID has 8 DWords */
  567.    ilo_cp_write(cp, num_ids * 8 * 4);
  568.    ilo_cp_write(cp, offset);
  569.    ilo_cp_end(cp);
  570. }
  571.  
  572. static void
  573. gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
  574.                               int id, int byte, int thread_count,
  575.                               struct ilo_cp *cp)
  576. {
  577.    const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
  578.    const uint8_t cmd_len = 2;
  579.    uint32_t dw1;
  580.  
  581.    ILO_GPE_VALID_GEN(dev, 6, 6);
  582.  
  583.    dw1 = id << 16 |
  584.          byte << 8 |
  585.          thread_count;
  586.  
  587.    ilo_cp_begin(cp, cmd_len);
  588.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  589.    ilo_cp_write(cp, dw1);
  590.    ilo_cp_end(cp);
  591. }
  592.  
  593. static void
  594. gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
  595.                             int thread_count_water_mark,
  596.                             int barrier_mask,
  597.                             struct ilo_cp *cp)
  598. {
  599.    const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
  600.    const uint8_t cmd_len = 2;
  601.    uint32_t dw1;
  602.  
  603.    ILO_GPE_VALID_GEN(dev, 6, 6);
  604.  
  605.    dw1 = thread_count_water_mark << 16 |
  606.          barrier_mask;
  607.  
  608.    ilo_cp_begin(cp, cmd_len);
  609.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  610.    ilo_cp_write(cp, dw1);
  611.    ilo_cp_end(cp);
  612. }
  613.  
  614. static void
  615. gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
  616.                               struct ilo_cp *cp)
  617. {
  618.    assert(!"MEDIA_OBJECT_WALKER unsupported");
  619. }
  620.  
  621. static void
  622. gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
  623.                                          uint32_t vs_binding_table,
  624.                                          uint32_t gs_binding_table,
  625.                                          uint32_t ps_binding_table,
  626.                                          struct ilo_cp *cp)
  627. {
  628.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
  629.    const uint8_t cmd_len = 4;
  630.  
  631.    ILO_GPE_VALID_GEN(dev, 6, 6);
  632.  
  633.    ilo_cp_begin(cp, cmd_len);
  634.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  635.                     GEN6_BINDING_TABLE_MODIFY_VS |
  636.                     GEN6_BINDING_TABLE_MODIFY_GS |
  637.                     GEN6_BINDING_TABLE_MODIFY_PS);
  638.    ilo_cp_write(cp, vs_binding_table);
  639.    ilo_cp_write(cp, gs_binding_table);
  640.    ilo_cp_write(cp, ps_binding_table);
  641.    ilo_cp_end(cp);
  642. }
  643.  
  644. static void
  645. gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
  646.                                          uint32_t vs_sampler_state,
  647.                                          uint32_t gs_sampler_state,
  648.                                          uint32_t ps_sampler_state,
  649.                                          struct ilo_cp *cp)
  650. {
  651.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
  652.    const uint8_t cmd_len = 4;
  653.  
  654.    ILO_GPE_VALID_GEN(dev, 6, 6);
  655.  
  656.    ilo_cp_begin(cp, cmd_len);
  657.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  658.                     VS_SAMPLER_STATE_CHANGE |
  659.                     GS_SAMPLER_STATE_CHANGE |
  660.                     PS_SAMPLER_STATE_CHANGE);
  661.    ilo_cp_write(cp, vs_sampler_state);
  662.    ilo_cp_write(cp, gs_sampler_state);
  663.    ilo_cp_write(cp, ps_sampler_state);
  664.    ilo_cp_end(cp);
  665. }
  666.  
  667. static void
  668. gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
  669.                       int vs_total_size, int gs_total_size,
  670.                       int vs_entry_size, int gs_entry_size,
  671.                       struct ilo_cp *cp)
  672. {
  673.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
  674.    const uint8_t cmd_len = 3;
  675.    const int row_size = 128; /* 1024 bits */
  676.    int vs_alloc_size, gs_alloc_size;
  677.    int vs_num_entries, gs_num_entries;
  678.  
  679.    ILO_GPE_VALID_GEN(dev, 6, 6);
  680.  
  681.    /* in 1024-bit URB rows */
  682.    vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
  683.    gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
  684.  
  685.    /* the valid range is [1, 5] */
  686.    if (!vs_alloc_size)
  687.       vs_alloc_size = 1;
  688.    if (!gs_alloc_size)
  689.       gs_alloc_size = 1;
  690.    assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
  691.  
  692.    /* the valid range is [24, 256] in multiples of 4 */
  693.    vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
  694.    if (vs_num_entries > 256)
  695.       vs_num_entries = 256;
  696.    assert(vs_num_entries >= 24);
  697.  
  698.    /* the valid range is [0, 256] in multiples of 4 */
  699.    gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
  700.    if (gs_num_entries > 256)
  701.       gs_num_entries = 256;
  702.  
  703.    ilo_cp_begin(cp, cmd_len);
  704.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  705.    ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
  706.                     vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
  707.    ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
  708.                     (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
  709.    ilo_cp_end(cp);
  710. }
  711.  
  712. static void
  713. gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
  714.                                  const struct pipe_vertex_buffer *vbuffers,
  715.                                  uint64_t vbuffer_mask,
  716.                                  const struct ilo_ve_state *ve,
  717.                                  struct ilo_cp *cp)
  718. {
  719.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
  720.    uint8_t cmd_len;
  721.    unsigned hw_idx;
  722.  
  723.    ILO_GPE_VALID_GEN(dev, 6, 7);
  724.  
  725.    /*
  726.     * From the Sandy Bridge PRM, volume 2 part 1, page 82:
  727.     *
  728.     *     "From 1 to 33 VBs can be specified..."
  729.     */
  730.    assert(vbuffer_mask <= (1UL << 33));
  731.  
  732.    if (!vbuffer_mask)
  733.       return;
  734.  
  735.    cmd_len = 1;
  736.  
  737.    for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
  738.       const unsigned pipe_idx = ve->vb_mapping[hw_idx];
  739.  
  740.       if (vbuffer_mask & (1 << pipe_idx))
  741.          cmd_len += 4;
  742.    }
  743.  
  744.    ilo_cp_begin(cp, cmd_len);
  745.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  746.  
  747.    for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
  748.       const unsigned instance_divisor = ve->instance_divisors[hw_idx];
  749.       const unsigned pipe_idx = ve->vb_mapping[hw_idx];
  750.       const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
  751.       uint32_t dw;
  752.  
  753.       if (!(vbuffer_mask & (1 << pipe_idx)))
  754.          continue;
  755.  
  756.       dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
  757.  
  758.       if (instance_divisor)
  759.          dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
  760.       else
  761.          dw |= GEN6_VB0_ACCESS_VERTEXDATA;
  762.  
  763.       if (dev->gen >= ILO_GEN(7))
  764.          dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
  765.  
  766.       /* use null vb if there is no buffer or the stride is out of range */
  767.       if (vb->buffer && vb->stride <= 2048) {
  768.          const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
  769.          const uint32_t start_offset = vb->buffer_offset;
  770.          /*
  771.           * As noted in ilo_translate_format(), we treat some 3-component
  772.           * formats as 4-component formats to work around hardware
  773.           * limitations.  Imagine the case where the vertex buffer holds a
  774.           * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
  775.           * The hardware would not be able to fetch it because the vertex
  776.           * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
  777.           * and that takes at least 8 bytes.
  778.           *
  779.           * For the workaround to work, we query the physical size, which is
  780.           * page aligned, to calculate end_offset so that the last vertex has
  781.           * a better chance to be fetched.
  782.           */
  783.          const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
  784.  
  785.          dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
  786.  
  787.          ilo_cp_write(cp, dw);
  788.          ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
  789.          ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
  790.          ilo_cp_write(cp, instance_divisor);
  791.       }
  792.       else {
  793.          dw |= 1 << 13;
  794.  
  795.          ilo_cp_write(cp, dw);
  796.          ilo_cp_write(cp, 0);
  797.          ilo_cp_write(cp, 0);
  798.          ilo_cp_write(cp, instance_divisor);
  799.       }
  800.    }
  801.  
  802.    ilo_cp_end(cp);
  803. }
  804.  
  805. static void
  806. ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
  807.                     struct ilo_ve_cso *cso)
  808. {
  809.    int format;
  810.  
  811.    ILO_GPE_VALID_GEN(dev, 6, 7);
  812.  
  813.    /*
  814.     * From the Sandy Bridge PRM, volume 2 part 1, page 94:
  815.     *
  816.     *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
  817.     *        valid VERTEX_ELEMENT structure.
  818.     *
  819.     *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
  820.     *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
  821.     *
  822.     *      - The Source Element Format must be set to the UINT format.
  823.     *
  824.     *      - [DevSNB]: Edge Flags are not supported for QUADLIST
  825.     *        primitives.  Software may elect to convert QUADLIST primitives
  826.     *        to some set of corresponding edge-flag-supported primitive
  827.     *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
  828.     */
  829.  
  830.    cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
  831.    cso->payload[1] =
  832.          BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
  833.          BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
  834.          BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
  835.          BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
  836.  
  837.    /*
  838.     * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
  839.     * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
  840.     * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
  841.     *
  842.     * Since all the hardware cares about is whether the flags are zero or not,
  843.     * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
  844.     */
  845.    format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
  846.    if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
  847.       STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
  848.             BRW_SURFACEFORMAT_R32_FLOAT - 1);
  849.  
  850.       cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
  851.    }
  852.    else {
  853.       assert(format == BRW_SURFACEFORMAT_R8_UINT);
  854.    }
  855. }
  856.  
  857. static void
  858. ve_init_cso_with_components(const struct ilo_dev_info *dev,
  859.                             int comp0, int comp1, int comp2, int comp3,
  860.                             struct ilo_ve_cso *cso)
  861. {
  862.    ILO_GPE_VALID_GEN(dev, 6, 7);
  863.  
  864.    STATIC_ASSERT(Elements(cso->payload) >= 2);
  865.    cso->payload[0] = GEN6_VE0_VALID;
  866.    cso->payload[1] =
  867.          comp0 << BRW_VE1_COMPONENT_0_SHIFT |
  868.          comp1 << BRW_VE1_COMPONENT_1_SHIFT |
  869.          comp2 << BRW_VE1_COMPONENT_2_SHIFT |
  870.          comp3 << BRW_VE1_COMPONENT_3_SHIFT;
  871. }
  872.  
  873. static void
  874. ve_init_cso(const struct ilo_dev_info *dev,
  875.             const struct pipe_vertex_element *state,
  876.             unsigned vb_index,
  877.             struct ilo_ve_cso *cso)
  878. {
  879.    int comp[4] = {
  880.       BRW_VE1_COMPONENT_STORE_SRC,
  881.       BRW_VE1_COMPONENT_STORE_SRC,
  882.       BRW_VE1_COMPONENT_STORE_SRC,
  883.       BRW_VE1_COMPONENT_STORE_SRC,
  884.    };
  885.    int format;
  886.  
  887.    ILO_GPE_VALID_GEN(dev, 6, 7);
  888.  
  889.    switch (util_format_get_nr_components(state->src_format)) {
  890.    case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
  891.    case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
  892.    case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
  893.                      BRW_VE1_COMPONENT_STORE_1_INT :
  894.                      BRW_VE1_COMPONENT_STORE_1_FLT;
  895.    }
  896.  
  897.    format = ilo_translate_vertex_format(state->src_format);
  898.  
  899.    STATIC_ASSERT(Elements(cso->payload) >= 2);
  900.    cso->payload[0] =
  901.       vb_index << GEN6_VE0_INDEX_SHIFT |
  902.       GEN6_VE0_VALID |
  903.       format << BRW_VE0_FORMAT_SHIFT |
  904.       state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
  905.  
  906.    cso->payload[1] =
  907.          comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
  908.          comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
  909.          comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
  910.          comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
  911. }
  912.  
  913. void
  914. ilo_gpe_init_ve(const struct ilo_dev_info *dev,
  915.                 unsigned num_states,
  916.                 const struct pipe_vertex_element *states,
  917.                 struct ilo_ve_state *ve)
  918. {
  919.    unsigned i;
  920.  
  921.    ILO_GPE_VALID_GEN(dev, 6, 7);
  922.  
  923.    ve->count = num_states;
  924.    ve->vb_count = 0;
  925.  
  926.    for (i = 0; i < num_states; i++) {
  927.       const unsigned pipe_idx = states[i].vertex_buffer_index;
  928.       const unsigned instance_divisor = states[i].instance_divisor;
  929.       unsigned hw_idx;
  930.  
  931.       /*
  932.        * map the pipe vb to the hardware vb, which has a fixed instance
  933.        * divisor
  934.        */
  935.       for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
  936.          if (ve->vb_mapping[hw_idx] == pipe_idx &&
  937.              ve->instance_divisors[hw_idx] == instance_divisor)
  938.             break;
  939.       }
  940.  
  941.       /* create one if there is no matching hardware vb */
  942.       if (hw_idx >= ve->vb_count) {
  943.          hw_idx = ve->vb_count++;
  944.  
  945.          ve->vb_mapping[hw_idx] = pipe_idx;
  946.          ve->instance_divisors[hw_idx] = instance_divisor;
  947.       }
  948.  
  949.       ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
  950.    }
  951. }
  952.  
  953. static void
  954. gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
  955.                                   const struct ilo_ve_state *ve,
  956.                                   bool last_velement_edgeflag,
  957.                                   bool prepend_generated_ids,
  958.                                   struct ilo_cp *cp)
  959. {
  960.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
  961.    uint8_t cmd_len;
  962.    unsigned i;
  963.  
  964.    ILO_GPE_VALID_GEN(dev, 6, 7);
  965.  
  966.    /*
  967.     * From the Sandy Bridge PRM, volume 2 part 1, page 93:
  968.     *
  969.     *     "Up to 34 (DevSNB+) vertex elements are supported."
  970.     */
  971.    assert(ve->count + prepend_generated_ids <= 34);
  972.  
  973.    if (!ve->count && !prepend_generated_ids) {
  974.       struct ilo_ve_cso dummy;
  975.  
  976.       ve_init_cso_with_components(dev,
  977.             BRW_VE1_COMPONENT_STORE_0,
  978.             BRW_VE1_COMPONENT_STORE_0,
  979.             BRW_VE1_COMPONENT_STORE_0,
  980.             BRW_VE1_COMPONENT_STORE_1_FLT,
  981.             &dummy);
  982.  
  983.       cmd_len = 3;
  984.       ilo_cp_begin(cp, cmd_len);
  985.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  986.       ilo_cp_write_multi(cp, dummy.payload, 2);
  987.       ilo_cp_end(cp);
  988.  
  989.       return;
  990.    }
  991.  
  992.    cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
  993.  
  994.    ilo_cp_begin(cp, cmd_len);
  995.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  996.  
  997.    if (prepend_generated_ids) {
  998.       struct ilo_ve_cso gen_ids;
  999.  
  1000.       ve_init_cso_with_components(dev,
  1001.             BRW_VE1_COMPONENT_STORE_VID,
  1002.             BRW_VE1_COMPONENT_STORE_IID,
  1003.             BRW_VE1_COMPONENT_NOSTORE,
  1004.             BRW_VE1_COMPONENT_NOSTORE,
  1005.             &gen_ids);
  1006.  
  1007.       ilo_cp_write_multi(cp, gen_ids.payload, 2);
  1008.    }
  1009.  
  1010.    if (last_velement_edgeflag) {
  1011.       struct ilo_ve_cso edgeflag;
  1012.  
  1013.       for (i = 0; i < ve->count - 1; i++)
  1014.          ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
  1015.  
  1016.       edgeflag = ve->cso[i];
  1017.       ve_set_cso_edgeflag(dev, &edgeflag);
  1018.       ilo_cp_write_multi(cp, edgeflag.payload, 2);
  1019.    }
  1020.    else {
  1021.       for (i = 0; i < ve->count; i++)
  1022.          ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
  1023.    }
  1024.  
  1025.    ilo_cp_end(cp);
  1026. }
  1027.  
  1028. static void
  1029. gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
  1030.                                const struct ilo_ib_state *ib,
  1031.                                bool enable_cut_index,
  1032.                                struct ilo_cp *cp)
  1033. {
  1034.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
  1035.    const uint8_t cmd_len = 3;
  1036.    struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
  1037.    uint32_t start_offset, end_offset;
  1038.    int format;
  1039.  
  1040.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1041.  
  1042.    if (!buf)
  1043.       return;
  1044.  
  1045.    format = gen6_translate_index_size(ib->hw_index_size);
  1046.  
  1047.    /*
  1048.     * set start_offset to 0 here and adjust pipe_draw_info::start with
  1049.     * ib->draw_start_offset in 3DPRIMITIVE
  1050.     */
  1051.    start_offset = 0;
  1052.    end_offset = buf->bo_size;
  1053.  
  1054.    /* end_offset must also be aligned and is inclusive */
  1055.    end_offset -= (end_offset % ib->hw_index_size);
  1056.    end_offset--;
  1057.  
  1058.    ilo_cp_begin(cp, cmd_len);
  1059.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  1060.                     ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
  1061.                     format << 8);
  1062.    ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
  1063.    ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
  1064.    ilo_cp_end(cp);
  1065. }
  1066.  
  1067. static void
  1068. gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
  1069.                                           uint32_t clip_viewport,
  1070.                                           uint32_t sf_viewport,
  1071.                                           uint32_t cc_viewport,
  1072.                                           struct ilo_cp *cp)
  1073. {
  1074.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
  1075.    const uint8_t cmd_len = 4;
  1076.  
  1077.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1078.  
  1079.    ilo_cp_begin(cp, cmd_len);
  1080.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  1081.                     GEN6_CLIP_VIEWPORT_MODIFY |
  1082.                     GEN6_SF_VIEWPORT_MODIFY |
  1083.                     GEN6_CC_VIEWPORT_MODIFY);
  1084.    ilo_cp_write(cp, clip_viewport);
  1085.    ilo_cp_write(cp, sf_viewport);
  1086.    ilo_cp_write(cp, cc_viewport);
  1087.    ilo_cp_end(cp);
  1088. }
  1089.  
  1090. static void
  1091. gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
  1092.                                     uint32_t blend_state,
  1093.                                     uint32_t depth_stencil_state,
  1094.                                     uint32_t color_calc_state,
  1095.                                     struct ilo_cp *cp)
  1096. {
  1097.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
  1098.    const uint8_t cmd_len = 4;
  1099.  
  1100.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1101.  
  1102.    ilo_cp_begin(cp, cmd_len);
  1103.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1104.    ilo_cp_write(cp, blend_state | 1);
  1105.    ilo_cp_write(cp, depth_stencil_state | 1);
  1106.    ilo_cp_write(cp, color_calc_state | 1);
  1107.    ilo_cp_end(cp);
  1108. }
  1109.  
  1110. static void
  1111. gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
  1112.                                          uint32_t scissor_rect,
  1113.                                          struct ilo_cp *cp)
  1114. {
  1115.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
  1116.    const uint8_t cmd_len = 2;
  1117.  
  1118.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1119.  
  1120.    ilo_cp_begin(cp, cmd_len);
  1121.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1122.    ilo_cp_write(cp, scissor_rect);
  1123.    ilo_cp_end(cp);
  1124. }
  1125.  
  1126. void
  1127. ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
  1128.                     const struct ilo_shader_state *vs,
  1129.                     struct ilo_shader_cso *cso)
  1130. {
  1131.    int start_grf, vue_read_len, max_threads;
  1132.    uint32_t dw2, dw4, dw5;
  1133.  
  1134.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1135.  
  1136.    start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
  1137.    vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
  1138.  
  1139.    /*
  1140.     * From the Sandy Bridge PRM, volume 2 part 1, page 135:
  1141.     *
  1142.     *     "(Vertex URB Entry Read Length) Specifies the number of pairs of
  1143.     *      128-bit vertex elements to be passed into the payload for each
  1144.     *      vertex."
  1145.     *
  1146.     *     "It is UNDEFINED to set this field to 0 indicating no Vertex URB
  1147.     *      data to be read and passed to the thread."
  1148.     */
  1149.    vue_read_len = (vue_read_len + 1) / 2;
  1150.    if (!vue_read_len)
  1151.       vue_read_len = 1;
  1152.  
  1153.    switch (dev->gen) {
  1154.    case ILO_GEN(6):
  1155.       /*
  1156.        * From the Sandy Bridge PRM, volume 1 part 1, page 22:
  1157.        *
  1158.        *     "Device             # of EUs        #Threads/EU
  1159.        *      SNB GT2            12              5
  1160.        *      SNB GT1            6               4"
  1161.        */
  1162.       max_threads = (dev->gt == 2) ? 60 : 24;
  1163.       break;
  1164.    case ILO_GEN(7):
  1165.       /*
  1166.        * From the Ivy Bridge PRM, volume 1 part 1, page 18:
  1167.        *
  1168.        *     "Device             # of EUs        #Threads/EU
  1169.        *      Ivy Bridge (GT2)   16              8
  1170.        *      Ivy Bridge (GT1)   6               6"
  1171.        */
  1172.       max_threads = (dev->gt == 2) ? 128 : 36;
  1173.       break;
  1174.    case ILO_GEN(7.5):
  1175.       /* see brwCreateContext() */
  1176.       max_threads = (dev->gt == 2) ? 280 : 70;
  1177.       break;
  1178.    default:
  1179.       max_threads = 1;
  1180.       break;
  1181.    }
  1182.  
  1183.    dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
  1184.  
  1185.    dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
  1186.          vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
  1187.          0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
  1188.  
  1189.    dw5 = GEN6_VS_STATISTICS_ENABLE |
  1190.          GEN6_VS_ENABLE;
  1191.  
  1192.    if (dev->gen >= ILO_GEN(7.5))
  1193.       dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
  1194.    else
  1195.       dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
  1196.  
  1197.    STATIC_ASSERT(Elements(cso->payload) >= 3);
  1198.    cso->payload[0] = dw2;
  1199.    cso->payload[1] = dw4;
  1200.    cso->payload[2] = dw5;
  1201. }
  1202.  
  1203. static void
  1204. gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
  1205.                      const struct ilo_shader_state *vs,
  1206.                      int num_samplers,
  1207.                      struct ilo_cp *cp)
  1208. {
  1209.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
  1210.    const uint8_t cmd_len = 6;
  1211.    const struct ilo_shader_cso *cso;
  1212.    uint32_t dw2, dw4, dw5;
  1213.  
  1214.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1215.  
  1216.    if (!vs) {
  1217.       ilo_cp_begin(cp, cmd_len);
  1218.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  1219.       ilo_cp_write(cp, 0);
  1220.       ilo_cp_write(cp, 0);
  1221.       ilo_cp_write(cp, 0);
  1222.       ilo_cp_write(cp, 0);
  1223.       ilo_cp_write(cp, 0);
  1224.       ilo_cp_end(cp);
  1225.       return;
  1226.    }
  1227.  
  1228.    cso = ilo_shader_get_kernel_cso(vs);
  1229.    dw2 = cso->payload[0];
  1230.    dw4 = cso->payload[1];
  1231.    dw5 = cso->payload[2];
  1232.  
  1233.    dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
  1234.  
  1235.    ilo_cp_begin(cp, cmd_len);
  1236.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1237.    ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
  1238.    ilo_cp_write(cp, dw2);
  1239.    ilo_cp_write(cp, 0); /* scratch */
  1240.    ilo_cp_write(cp, dw4);
  1241.    ilo_cp_write(cp, dw5);
  1242.    ilo_cp_end(cp);
  1243. }
  1244.  
  1245. void
  1246. ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
  1247.                          const struct ilo_shader_state *gs,
  1248.                          struct ilo_shader_cso *cso)
  1249. {
  1250.    int start_grf, vue_read_len, max_threads;
  1251.    uint32_t dw2, dw4, dw5, dw6;
  1252.  
  1253.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1254.  
  1255.    if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
  1256.       start_grf = ilo_shader_get_kernel_param(gs,
  1257.             ILO_KERNEL_URB_DATA_START_REG);
  1258.  
  1259.       vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
  1260.    }
  1261.    else {
  1262.       start_grf = ilo_shader_get_kernel_param(gs,
  1263.             ILO_KERNEL_VS_GEN6_SO_START_REG);
  1264.  
  1265.       vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
  1266.    }
  1267.  
  1268.    /*
  1269.     * From the Sandy Bridge PRM, volume 2 part 1, page 153:
  1270.     *
  1271.     *     "Specifies the amount of URB data read and passed in the thread
  1272.     *      payload for each Vertex URB entry, in 256-bit register increments.
  1273.     *
  1274.     *      It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
  1275.     *      0 indicating no Vertex URB data to be read and passed to the
  1276.     *      thread."
  1277.     */
  1278.    vue_read_len = (vue_read_len + 1) / 2;
  1279.    if (!vue_read_len)
  1280.       vue_read_len = 1;
  1281.  
  1282.    /*
  1283.     * From the Sandy Bridge PRM, volume 2 part 1, page 154:
  1284.     *
  1285.     *     "Maximum Number of Threads valid range is [0,27] when Rendering
  1286.     *      Enabled bit is set."
  1287.     *
  1288.     * From the Sandy Bridge PRM, volume 2 part 1, page 173:
  1289.     *
  1290.     *     "Programming Note: If the GS stage is enabled, software must always
  1291.     *      allocate at least one GS URB Entry. This is true even if the GS
  1292.     *      thread never needs to output vertices to the pipeline, e.g., when
  1293.     *      only performing stream output. This is an artifact of the need to
  1294.     *      pass the GS thread an initial destination URB handle."
  1295.     *
  1296.     * As such, we always enable rendering, and limit the number of threads.
  1297.     */
  1298.    if (dev->gt == 2) {
  1299.       /* maximum is 60, but limited to 28 */
  1300.       max_threads = 28;
  1301.    }
  1302.    else {
  1303.       /* maximum is 24, but limited to 21 (see brwCreateContext()) */
  1304.       max_threads = 21;
  1305.    }
  1306.  
  1307.    dw2 = GEN6_GS_SPF_MODE;
  1308.  
  1309.    dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
  1310.          0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
  1311.          start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
  1312.  
  1313.    dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
  1314.          GEN6_GS_STATISTICS_ENABLE |
  1315.          GEN6_GS_SO_STATISTICS_ENABLE |
  1316.          GEN6_GS_RENDERING_ENABLE;
  1317.  
  1318.    /*
  1319.     * we cannot make use of GEN6_GS_REORDER because it will reorder
  1320.     * triangle strips according to D3D rules (triangle 2N+1 uses vertices
  1321.     * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
  1322.     * (2N+2, 2N+1, 2N+3)).
  1323.     */
  1324.    dw6 = GEN6_GS_ENABLE;
  1325.  
  1326.    if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
  1327.       dw6 |= GEN6_GS_DISCARD_ADJACENCY;
  1328.  
  1329.    if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
  1330.       const uint32_t svbi_post_inc =
  1331.          ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
  1332.  
  1333.       dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
  1334.       if (svbi_post_inc) {
  1335.          dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
  1336.                 svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
  1337.       }
  1338.    }
  1339.  
  1340.    STATIC_ASSERT(Elements(cso->payload) >= 4);
  1341.    cso->payload[0] = dw2;
  1342.    cso->payload[1] = dw4;
  1343.    cso->payload[2] = dw5;
  1344.    cso->payload[3] = dw6;
  1345. }
  1346.  
  1347. static void
  1348. gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
  1349.                      const struct ilo_shader_state *gs,
  1350.                      const struct ilo_shader_state *vs,
  1351.                      int verts_per_prim,
  1352.                      struct ilo_cp *cp)
  1353. {
  1354.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
  1355.    const uint8_t cmd_len = 7;
  1356.    uint32_t dw1, dw2, dw4, dw5, dw6;
  1357.  
  1358.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1359.  
  1360.    if (gs) {
  1361.       const struct ilo_shader_cso *cso;
  1362.  
  1363.       dw1 = ilo_shader_get_kernel_offset(gs);
  1364.  
  1365.       cso = ilo_shader_get_kernel_cso(gs);
  1366.       dw2 = cso->payload[0];
  1367.       dw4 = cso->payload[1];
  1368.       dw5 = cso->payload[2];
  1369.       dw6 = cso->payload[3];
  1370.    }
  1371.    else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
  1372.       struct ilo_shader_cso cso;
  1373.       enum ilo_kernel_param param;
  1374.  
  1375.       switch (verts_per_prim) {
  1376.       case 1:
  1377.          param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
  1378.          break;
  1379.       case 2:
  1380.          param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
  1381.          break;
  1382.       default:
  1383.          param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
  1384.          break;
  1385.       }
  1386.  
  1387.       dw1 = ilo_shader_get_kernel_offset(vs) +
  1388.          ilo_shader_get_kernel_param(vs, param);
  1389.  
  1390.       /* cannot use VS's CSO */
  1391.       ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
  1392.       dw2 = cso.payload[0];
  1393.       dw4 = cso.payload[1];
  1394.       dw5 = cso.payload[2];
  1395.       dw6 = cso.payload[3];
  1396.    }
  1397.    else {
  1398.       dw1 = 0;
  1399.       dw2 = 0;
  1400.       dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
  1401.       dw5 = GEN6_GS_STATISTICS_ENABLE;
  1402.       dw6 = 0;
  1403.    }
  1404.  
  1405.    ilo_cp_begin(cp, cmd_len);
  1406.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1407.    ilo_cp_write(cp, dw1);
  1408.    ilo_cp_write(cp, dw2);
  1409.    ilo_cp_write(cp, 0);
  1410.    ilo_cp_write(cp, dw4);
  1411.    ilo_cp_write(cp, dw5);
  1412.    ilo_cp_write(cp, dw6);
  1413.    ilo_cp_end(cp);
  1414. }
  1415.  
  1416. void
  1417. ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
  1418.                              const struct pipe_rasterizer_state *state,
  1419.                              struct ilo_rasterizer_clip *clip)
  1420. {
  1421.    uint32_t dw1, dw2, dw3;
  1422.  
  1423.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1424.  
  1425.    dw1 = GEN6_CLIP_STATISTICS_ENABLE;
  1426.  
  1427.    if (dev->gen >= ILO_GEN(7)) {
  1428.       /*
  1429.        * From the Ivy Bridge PRM, volume 2 part 1, page 219:
  1430.        *
  1431.        *     "Workaround : Due to Hardware issue "EarlyCull" needs to be
  1432.        *      enabled only for the cases where the incoming primitive topology
  1433.        *      into the clipper guaranteed to be Trilist."
  1434.        *
  1435.        * What does this mean?
  1436.        */
  1437.       dw1 |= 0 << 19 |
  1438.              GEN7_CLIP_EARLY_CULL;
  1439.  
  1440.       if (state->front_ccw)
  1441.          dw1 |= GEN7_CLIP_WINDING_CCW;
  1442.  
  1443.       switch (state->cull_face) {
  1444.       case PIPE_FACE_NONE:
  1445.          dw1 |= GEN7_CLIP_CULLMODE_NONE;
  1446.          break;
  1447.       case PIPE_FACE_FRONT:
  1448.          dw1 |= GEN7_CLIP_CULLMODE_FRONT;
  1449.          break;
  1450.       case PIPE_FACE_BACK:
  1451.          dw1 |= GEN7_CLIP_CULLMODE_BACK;
  1452.          break;
  1453.       case PIPE_FACE_FRONT_AND_BACK:
  1454.          dw1 |= GEN7_CLIP_CULLMODE_BOTH;
  1455.          break;
  1456.       }
  1457.    }
  1458.  
  1459.    dw2 = GEN6_CLIP_ENABLE |
  1460.          GEN6_CLIP_XY_TEST |
  1461.          state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
  1462.          GEN6_CLIP_MODE_NORMAL;
  1463.  
  1464.    if (state->clip_halfz)
  1465.       dw2 |= GEN6_CLIP_API_D3D;
  1466.    else
  1467.       dw2 |= GEN6_CLIP_API_OGL;
  1468.  
  1469.    if (state->depth_clip)
  1470.       dw2 |= GEN6_CLIP_Z_TEST;
  1471.  
  1472.    if (state->flatshade_first) {
  1473.       dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
  1474.              0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
  1475.              1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
  1476.    }
  1477.    else {
  1478.       dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
  1479.              1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
  1480.              2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
  1481.    }
  1482.  
  1483.    dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
  1484.          0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
  1485.  
  1486.    clip->payload[0] = dw1;
  1487.    clip->payload[1] = dw2;
  1488.    clip->payload[2] = dw3;
  1489.  
  1490.    clip->can_enable_guardband = true;
  1491.  
  1492.    /*
  1493.     * There are several reasons that guard band test should be disabled
  1494.     *
  1495.     *  - GL wide points (to avoid partially visibie object)
  1496.     *  - GL wide or AA lines (to avoid partially visibie object)
  1497.     */
  1498.    if (state->point_size_per_vertex || state->point_size > 1.0f)
  1499.       clip->can_enable_guardband = false;
  1500.    if (state->line_smooth || state->line_width > 1.0f)
  1501.       clip->can_enable_guardband = false;
  1502. }
  1503.  
  1504. static void
  1505. gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
  1506.                        const struct ilo_rasterizer_state *rasterizer,
  1507.                        const struct ilo_shader_state *fs,
  1508.                        bool enable_guardband,
  1509.                        int num_viewports,
  1510.                        struct ilo_cp *cp)
  1511. {
  1512.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
  1513.    const uint8_t cmd_len = 4;
  1514.    uint32_t dw1, dw2, dw3;
  1515.  
  1516.    if (rasterizer) {
  1517.       int interps;
  1518.  
  1519.       dw1 = rasterizer->clip.payload[0];
  1520.       dw2 = rasterizer->clip.payload[1];
  1521.       dw3 = rasterizer->clip.payload[2];
  1522.  
  1523.       if (enable_guardband && rasterizer->clip.can_enable_guardband)
  1524.          dw2 |= GEN6_CLIP_GB_TEST;
  1525.  
  1526.       interps = (fs) ?  ilo_shader_get_kernel_param(fs,
  1527.             ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
  1528.  
  1529.       if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
  1530.                      1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
  1531.                      1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
  1532.          dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
  1533.  
  1534.       dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
  1535.              (num_viewports - 1);
  1536.    }
  1537.    else {
  1538.       dw1 = 0;
  1539.       dw2 = 0;
  1540.       dw3 = 0;
  1541.    }
  1542.  
  1543.    ilo_cp_begin(cp, cmd_len);
  1544.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1545.    ilo_cp_write(cp, dw1);
  1546.    ilo_cp_write(cp, dw2);
  1547.    ilo_cp_write(cp, dw3);
  1548.    ilo_cp_end(cp);
  1549. }
  1550.  
  1551. void
  1552. ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
  1553.                            const struct pipe_rasterizer_state *state,
  1554.                            struct ilo_rasterizer_sf *sf)
  1555. {
  1556.    float offset_const, offset_scale, offset_clamp;
  1557.    int line_width, point_width;
  1558.    uint32_t dw1, dw2, dw3;
  1559.  
  1560.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1561.  
  1562.    /*
  1563.     * Scale the constant term.  The minimum representable value used by the HW
  1564.     * is not large enouch to be the minimum resolvable difference.
  1565.     */
  1566.    offset_const = state->offset_units * 2.0f;
  1567.  
  1568.    offset_scale = state->offset_scale;
  1569.    offset_clamp = state->offset_clamp;
  1570.  
  1571.    /*
  1572.     * From the Sandy Bridge PRM, volume 2 part 1, page 248:
  1573.     *
  1574.     *     "This bit (Statistics Enable) should be set whenever clipping is
  1575.     *      enabled and the Statistics Enable bit is set in CLIP_STATE. It
  1576.     *      should be cleared if clipping is disabled or Statistics Enable in
  1577.     *      CLIP_STATE is clear."
  1578.     */
  1579.    dw1 = GEN6_SF_STATISTICS_ENABLE |
  1580.          GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
  1581.  
  1582.    /* XXX GEN6 path seems to work fine for GEN7 */
  1583.    if (false && dev->gen >= ILO_GEN(7)) {
  1584.       /*
  1585.        * From the Ivy Bridge PRM, volume 2 part 1, page 258:
  1586.        *
  1587.        *     "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
  1588.        *      Enable Solid , Global Depth Offset Enable Wireframe, and Global
  1589.        *      Depth Offset Enable Point) should be set whenever non zero depth
  1590.        *      bias (Slope, Bias) values are used. Setting this bit may have
  1591.        *      some degradation of performance for some workloads."
  1592.        */
  1593.       if (state->offset_tri || state->offset_line || state->offset_point) {
  1594.          /* XXX need to scale offset_const according to the depth format */
  1595.          dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
  1596.  
  1597.          dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
  1598.                 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
  1599.                 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
  1600.       }
  1601.       else {
  1602.          offset_const = 0.0f;
  1603.          offset_scale = 0.0f;
  1604.          offset_clamp = 0.0f;
  1605.       }
  1606.    }
  1607.    else {
  1608.       if (state->offset_tri)
  1609.          dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
  1610.       if (state->offset_line)
  1611.          dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
  1612.       if (state->offset_point)
  1613.          dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
  1614.    }
  1615.  
  1616.    switch (state->fill_front) {
  1617.    case PIPE_POLYGON_MODE_FILL:
  1618.       dw1 |= GEN6_SF_FRONT_SOLID;
  1619.       break;
  1620.    case PIPE_POLYGON_MODE_LINE:
  1621.       dw1 |= GEN6_SF_FRONT_WIREFRAME;
  1622.       break;
  1623.    case PIPE_POLYGON_MODE_POINT:
  1624.       dw1 |= GEN6_SF_FRONT_POINT;
  1625.       break;
  1626.    }
  1627.  
  1628.    switch (state->fill_back) {
  1629.    case PIPE_POLYGON_MODE_FILL:
  1630.       dw1 |= GEN6_SF_BACK_SOLID;
  1631.       break;
  1632.    case PIPE_POLYGON_MODE_LINE:
  1633.       dw1 |= GEN6_SF_BACK_WIREFRAME;
  1634.       break;
  1635.    case PIPE_POLYGON_MODE_POINT:
  1636.       dw1 |= GEN6_SF_BACK_POINT;
  1637.       break;
  1638.    }
  1639.  
  1640.    if (state->front_ccw)
  1641.       dw1 |= GEN6_SF_WINDING_CCW;
  1642.  
  1643.    dw2 = 0;
  1644.  
  1645.    if (state->line_smooth) {
  1646.       /*
  1647.        * From the Sandy Bridge PRM, volume 2 part 1, page 251:
  1648.        *
  1649.        *     "This field (Anti-aliasing Enable) must be disabled if any of the
  1650.        *      render targets have integer (UINT or SINT) surface format."
  1651.        *
  1652.        * From the Sandy Bridge PRM, volume 2 part 1, page 317:
  1653.        *
  1654.        *     "This field (Hierarchical Depth Buffer Enable) must be disabled
  1655.        *      if Anti-aliasing Enable in 3DSTATE_SF is enabled.
  1656.        *
  1657.        * TODO We do not check those yet.
  1658.        */
  1659.       dw2 |= GEN6_SF_LINE_AA_ENABLE |
  1660.              GEN6_SF_LINE_END_CAP_WIDTH_1_0;
  1661.    }
  1662.  
  1663.    switch (state->cull_face) {
  1664.    case PIPE_FACE_NONE:
  1665.       dw2 |= GEN6_SF_CULL_NONE;
  1666.       break;
  1667.    case PIPE_FACE_FRONT:
  1668.       dw2 |= GEN6_SF_CULL_FRONT;
  1669.       break;
  1670.    case PIPE_FACE_BACK:
  1671.       dw2 |= GEN6_SF_CULL_BACK;
  1672.       break;
  1673.    case PIPE_FACE_FRONT_AND_BACK:
  1674.       dw2 |= GEN6_SF_CULL_BOTH;
  1675.       break;
  1676.    }
  1677.  
  1678.    /*
  1679.     * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
  1680.     * pixels in the minor direction.  We have to make the lines slightly
  1681.     * thicker, 0.5 pixel on both sides, so that they intersect that many
  1682.     * pixels are considered into the lines.
  1683.     *
  1684.     * Line width is in U3.7.
  1685.     */
  1686.    line_width = (int) ((state->line_width +
  1687.             (float) state->line_smooth) * 128.0f + 0.5f);
  1688.    line_width = CLAMP(line_width, 0, 1023);
  1689.  
  1690.    if (line_width == 128 && !state->line_smooth) {
  1691.       /* use GIQ rules */
  1692.       line_width = 0;
  1693.    }
  1694.  
  1695.    dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
  1696.  
  1697.    if (state->scissor)
  1698.       dw2 |= GEN6_SF_SCISSOR_ENABLE;
  1699.  
  1700.    dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
  1701.          GEN6_SF_VERTEX_SUBPIXEL_8BITS;
  1702.  
  1703.    if (state->line_last_pixel)
  1704.       dw3 |= 1 << 31;
  1705.  
  1706.    if (state->flatshade_first) {
  1707.       dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
  1708.              0 << GEN6_SF_LINE_PROVOKE_SHIFT |
  1709.              1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
  1710.    }
  1711.    else {
  1712.       dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
  1713.              1 << GEN6_SF_LINE_PROVOKE_SHIFT |
  1714.              2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
  1715.    }
  1716.  
  1717.    if (!state->point_size_per_vertex)
  1718.       dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
  1719.  
  1720.    /* in U8.3 */
  1721.    point_width = (int) (state->point_size * 8.0f + 0.5f);
  1722.    point_width = CLAMP(point_width, 1, 2047);
  1723.  
  1724.    dw3 |= point_width;
  1725.  
  1726.    STATIC_ASSERT(Elements(sf->payload) >= 6);
  1727.    sf->payload[0] = dw1;
  1728.    sf->payload[1] = dw2;
  1729.    sf->payload[2] = dw3;
  1730.    sf->payload[3] = fui(offset_const);
  1731.    sf->payload[4] = fui(offset_scale);
  1732.    sf->payload[5] = fui(offset_clamp);
  1733.  
  1734.    if (state->multisample) {
  1735.       sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
  1736.  
  1737.       /*
  1738.        * From the Sandy Bridge PRM, volume 2 part 1, page 251:
  1739.        *
  1740.        *     "Software must not program a value of 0.0 when running in
  1741.        *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
  1742.        *      when multisampling rasterization is enabled."
  1743.        */
  1744.       if (!line_width) {
  1745.          line_width = 128; /* 1.0f */
  1746.  
  1747.          sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
  1748.       }
  1749.    }
  1750.    else {
  1751.       sf->dw_msaa = 0;
  1752.    }
  1753. }
  1754.  
  1755. /**
  1756.  * Fill in DW2 to DW7 of 3DSTATE_SF.
  1757.  */
  1758. void
  1759. ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
  1760.                                     const struct ilo_rasterizer_state *rasterizer,
  1761.                                     int num_samples,
  1762.                                     enum pipe_format depth_format,
  1763.                                     uint32_t *payload, unsigned payload_len)
  1764. {
  1765.    const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
  1766.  
  1767.    assert(payload_len == Elements(sf->payload));
  1768.  
  1769.    if (sf) {
  1770.       memcpy(payload, sf->payload, sizeof(sf->payload));
  1771.  
  1772.       if (num_samples > 1)
  1773.          payload[1] |= sf->dw_msaa;
  1774.  
  1775.       if (dev->gen >= ILO_GEN(7)) {
  1776.          int format;
  1777.  
  1778.          /* separate stencil */
  1779.          switch (depth_format) {
  1780.          case PIPE_FORMAT_Z24_UNORM_S8_UINT:
  1781.             depth_format = PIPE_FORMAT_Z24X8_UNORM;
  1782.             break;
  1783.          case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
  1784.             depth_format = PIPE_FORMAT_Z32_FLOAT;;
  1785.             break;
  1786.          case PIPE_FORMAT_S8_UINT:
  1787.             depth_format = PIPE_FORMAT_NONE;
  1788.             break;
  1789.          default:
  1790.             break;
  1791.          }
  1792.  
  1793.          format = gen6_translate_depth_format(depth_format);
  1794.          /* FLOAT surface is assumed when there is no depth buffer */
  1795.          if (format < 0)
  1796.             format = BRW_DEPTHFORMAT_D32_FLOAT;
  1797.  
  1798.          payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
  1799.       }
  1800.    }
  1801.    else {
  1802.       payload[0] = 0;
  1803.       payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
  1804.       payload[2] = 0;
  1805.       payload[3] = 0;
  1806.       payload[4] = 0;
  1807.       payload[5] = 0;
  1808.    }
  1809. }
  1810.  
  1811. /**
  1812.  * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
  1813.  */
  1814. void
  1815. ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
  1816.                                  const struct ilo_rasterizer_state *rasterizer,
  1817.                                  const struct ilo_shader_state *fs,
  1818.                                  const struct ilo_shader_state *last_sh,
  1819.                                  uint32_t *dw, int num_dwords)
  1820. {
  1821.    int output_count, vue_offset, vue_len;
  1822.    const struct ilo_kernel_routing *routing;
  1823.  
  1824.    ILO_GPE_VALID_GEN(dev, 6, 7);
  1825.    assert(num_dwords == 13);
  1826.  
  1827.    if (!fs) {
  1828.       memset(dw, 0, sizeof(dw[0]) * num_dwords);
  1829.  
  1830.       if (dev->gen >= ILO_GEN(7))
  1831.          dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
  1832.       else
  1833.          dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
  1834.  
  1835.       return;
  1836.    }
  1837.  
  1838.    output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
  1839.    assert(output_count <= 32);
  1840.  
  1841.    routing = ilo_shader_get_kernel_routing(fs);
  1842.  
  1843.    vue_offset = routing->source_skip;
  1844.    assert(vue_offset % 2 == 0);
  1845.    vue_offset /= 2;
  1846.  
  1847.    vue_len = (routing->source_len + 1) / 2;
  1848.    if (!vue_len)
  1849.       vue_len = 1;
  1850.  
  1851.    if (dev->gen >= ILO_GEN(7)) {
  1852.       dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
  1853.               vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
  1854.               vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
  1855.       if (routing->swizzle_enable)
  1856.          dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
  1857.    }
  1858.    else {
  1859.       dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
  1860.               vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
  1861.               vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
  1862.       if (routing->swizzle_enable)
  1863.          dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
  1864.    }
  1865.  
  1866.    switch (rasterizer->state.sprite_coord_mode) {
  1867.    case PIPE_SPRITE_COORD_UPPER_LEFT:
  1868.       dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
  1869.       break;
  1870.    case PIPE_SPRITE_COORD_LOWER_LEFT:
  1871.       dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
  1872.       break;
  1873.    }
  1874.  
  1875.    STATIC_ASSERT(Elements(routing->swizzles) >= 16);
  1876.    memcpy(&dw[1], routing->swizzles, 2 * 16);
  1877.  
  1878.    /*
  1879.     * From the Ivy Bridge PRM, volume 2 part 1, page 268:
  1880.     *
  1881.     *     "This field (Point Sprite Texture Coordinate Enable) must be
  1882.     *      programmed to 0 when non-point primitives are rendered."
  1883.     *
  1884.     * TODO We do not check that yet.
  1885.     */
  1886.    dw[9] = routing->point_sprite_enable;
  1887.  
  1888.    dw[10] = routing->const_interp_enable;
  1889.  
  1890.    /* WrapShortest enables */
  1891.    dw[11] = 0;
  1892.    dw[12] = 0;
  1893. }
  1894.  
  1895. static void
  1896. gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
  1897.                      const struct ilo_rasterizer_state *rasterizer,
  1898.                      const struct ilo_shader_state *fs,
  1899.                      const struct ilo_shader_state *last_sh,
  1900.                      struct ilo_cp *cp)
  1901. {
  1902.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
  1903.    const uint8_t cmd_len = 20;
  1904.    uint32_t payload_raster[6], payload_sbe[13];
  1905.  
  1906.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1907.  
  1908.    ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
  1909.          1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
  1910.    ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
  1911.          fs, last_sh, payload_sbe, Elements(payload_sbe));
  1912.  
  1913.    ilo_cp_begin(cp, cmd_len);
  1914.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1915.    ilo_cp_write(cp, payload_sbe[0]);
  1916.    ilo_cp_write_multi(cp, payload_raster, 6);
  1917.    ilo_cp_write_multi(cp, &payload_sbe[1], 12);
  1918.    ilo_cp_end(cp);
  1919. }
  1920.  
  1921. void
  1922. ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
  1923.                                 const struct pipe_rasterizer_state *state,
  1924.                                 struct ilo_rasterizer_wm *wm)
  1925. {
  1926.    uint32_t dw5, dw6;
  1927.  
  1928.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1929.  
  1930.    /* only the FF unit states are set, as in GEN7 */
  1931.  
  1932.    dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
  1933.  
  1934.    /* same value as in 3DSTATE_SF */
  1935.    if (state->line_smooth)
  1936.       dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
  1937.  
  1938.    if (state->poly_stipple_enable)
  1939.       dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
  1940.    if (state->line_stipple_enable)
  1941.       dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
  1942.  
  1943.    dw6 = GEN6_WM_POSITION_ZW_PIXEL |
  1944.          GEN6_WM_MSRAST_OFF_PIXEL |
  1945.          GEN6_WM_MSDISPMODE_PERSAMPLE;
  1946.  
  1947.    if (state->bottom_edge_rule)
  1948.       dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
  1949.  
  1950.    /*
  1951.     * assertion that makes sure
  1952.     *
  1953.     *   dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
  1954.     *
  1955.     * is valid
  1956.     */
  1957.    STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
  1958.                  GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
  1959.  
  1960.    wm->dw_msaa_rast =
  1961.       (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
  1962.    wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
  1963.  
  1964.    STATIC_ASSERT(Elements(wm->payload) >= 2);
  1965.    wm->payload[0] = dw5;
  1966.    wm->payload[1] = dw6;
  1967. }
  1968.  
  1969. void
  1970. ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
  1971.                          const struct ilo_shader_state *fs,
  1972.                          struct ilo_shader_cso *cso)
  1973. {
  1974.    int start_grf, input_count, interps, max_threads;
  1975.    uint32_t dw2, dw4, dw5, dw6;
  1976.  
  1977.    ILO_GPE_VALID_GEN(dev, 6, 6);
  1978.  
  1979.    start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
  1980.    input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
  1981.    interps = ilo_shader_get_kernel_param(fs,
  1982.          ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
  1983.  
  1984.    /* see brwCreateContext() */
  1985.    max_threads = (dev->gt == 2) ? 80 : 40;
  1986.  
  1987.    dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
  1988.  
  1989.    dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
  1990.          0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
  1991.          0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
  1992.  
  1993.    dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
  1994.  
  1995.    /*
  1996.     * From the Sandy Bridge PRM, volume 2 part 1, page 275:
  1997.     *
  1998.     *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
  1999.     *      PS kernel or color calculator has the ability to kill (discard)
  2000.     *      pixels or samples, other than due to depth or stencil testing.
  2001.     *      This bit is required to be ENABLED in the following situations:
  2002.     *
  2003.     *      The API pixel shader program contains "killpix" or "discard"
  2004.     *      instructions, or other code in the pixel shader kernel that can
  2005.     *      cause the final pixel mask to differ from the pixel mask received
  2006.     *      on dispatch.
  2007.     *
  2008.     *      A sampler with chroma key enabled with kill pixel mode is used by
  2009.     *      the pixel shader.
  2010.     *
  2011.     *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
  2012.     *      enabled.
  2013.     *
  2014.     *      The pixel shader kernel generates and outputs oMask.
  2015.     *
  2016.     *      Note: As ClipDistance clipping is fully supported in hardware and
  2017.     *      therefore not via PS instructions, there should be no need to
  2018.     *      ENABLE this bit due to ClipDistance clipping."
  2019.     */
  2020.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
  2021.       dw5 |= GEN6_WM_KILL_ENABLE;
  2022.  
  2023.    /*
  2024.     * From the Sandy Bridge PRM, volume 2 part 1, page 275:
  2025.     *
  2026.     *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
  2027.     *      field must be set to disabled."
  2028.     *
  2029.     * TODO This is not checked yet.
  2030.     */
  2031.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
  2032.       dw5 |= GEN6_WM_COMPUTED_DEPTH;
  2033.  
  2034.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
  2035.       dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
  2036.  
  2037.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
  2038.       dw5 |= GEN6_WM_USES_SOURCE_W;
  2039.  
  2040.    /*
  2041.     * TODO set this bit only when
  2042.     *
  2043.     *  a) fs writes colors and color is not masked, or
  2044.     *  b) fs writes depth, or
  2045.     *  c) fs or cc kills
  2046.     */
  2047.    if (true)
  2048.       dw5 |= GEN6_WM_DISPATCH_ENABLE;
  2049.  
  2050.    assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
  2051.    dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
  2052.  
  2053.    dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
  2054.          GEN6_WM_POSOFFSET_NONE |
  2055.          interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
  2056.  
  2057.    STATIC_ASSERT(Elements(cso->payload) >= 4);
  2058.    cso->payload[0] = dw2;
  2059.    cso->payload[1] = dw4;
  2060.    cso->payload[2] = dw5;
  2061.    cso->payload[3] = dw6;
  2062. }
  2063.  
  2064. static void
  2065. gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
  2066.                      const struct ilo_shader_state *fs,
  2067.                      int num_samplers,
  2068.                      const struct ilo_rasterizer_state *rasterizer,
  2069.                      bool dual_blend, bool cc_may_kill,
  2070.                      struct ilo_cp *cp)
  2071. {
  2072.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
  2073.    const uint8_t cmd_len = 9;
  2074.    const int num_samples = 1;
  2075.    const struct ilo_shader_cso *fs_cso;
  2076.    uint32_t dw2, dw4, dw5, dw6;
  2077.  
  2078.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2079.  
  2080.    if (!fs) {
  2081.       /* see brwCreateContext() */
  2082.       const int max_threads = (dev->gt == 2) ? 80 : 40;
  2083.  
  2084.       ilo_cp_begin(cp, cmd_len);
  2085.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  2086.       ilo_cp_write(cp, 0);
  2087.       ilo_cp_write(cp, 0);
  2088.       ilo_cp_write(cp, 0);
  2089.       ilo_cp_write(cp, 0);
  2090.       /* honor the valid range even if dispatching is disabled */
  2091.       ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
  2092.       ilo_cp_write(cp, 0);
  2093.       ilo_cp_write(cp, 0);
  2094.       ilo_cp_write(cp, 0);
  2095.       ilo_cp_end(cp);
  2096.  
  2097.       return;
  2098.    }
  2099.  
  2100.    fs_cso = ilo_shader_get_kernel_cso(fs);
  2101.    dw2 = fs_cso->payload[0];
  2102.    dw4 = fs_cso->payload[1];
  2103.    dw5 = fs_cso->payload[2];
  2104.    dw6 = fs_cso->payload[3];
  2105.  
  2106.    dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
  2107.  
  2108.    if (true) {
  2109.       dw4 |= GEN6_WM_STATISTICS_ENABLE;
  2110.    }
  2111.    else {
  2112.       /*
  2113.        * From the Sandy Bridge PRM, volume 2 part 1, page 248:
  2114.        *
  2115.        *     "This bit (Statistics Enable) must be disabled if either of these
  2116.        *      bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
  2117.        *      Resolve Enable or Depth Buffer Resolve Enable."
  2118.        */
  2119.       dw4 |= GEN6_WM_DEPTH_CLEAR;
  2120.       dw4 |= GEN6_WM_DEPTH_RESOLVE;
  2121.       dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
  2122.    }
  2123.  
  2124.    if (cc_may_kill) {
  2125.       dw5 |= GEN6_WM_KILL_ENABLE |
  2126.              GEN6_WM_DISPATCH_ENABLE;
  2127.    }
  2128.  
  2129.    if (dual_blend)
  2130.       dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
  2131.  
  2132.    dw5 |= rasterizer->wm.payload[0];
  2133.  
  2134.    dw6 |= rasterizer->wm.payload[1];
  2135.  
  2136.    if (num_samples > 1) {
  2137.       dw6 |= rasterizer->wm.dw_msaa_rast |
  2138.              rasterizer->wm.dw_msaa_disp;
  2139.    }
  2140.  
  2141.    ilo_cp_begin(cp, cmd_len);
  2142.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2143.    ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
  2144.    ilo_cp_write(cp, dw2);
  2145.    ilo_cp_write(cp, 0); /* scratch */
  2146.    ilo_cp_write(cp, dw4);
  2147.    ilo_cp_write(cp, dw5);
  2148.    ilo_cp_write(cp, dw6);
  2149.    ilo_cp_write(cp, 0); /* kernel 1 */
  2150.    ilo_cp_write(cp, 0); /* kernel 2 */
  2151.    ilo_cp_end(cp);
  2152. }
  2153.  
  2154. static unsigned
  2155. gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
  2156.                            const uint32_t *bufs, const int *sizes,
  2157.                            int num_bufs, int max_read_length,
  2158.                            uint32_t *dw, int num_dwords)
  2159. {
  2160.    unsigned enabled = 0x0;
  2161.    int total_read_length, i;
  2162.  
  2163.    assert(num_dwords == 4);
  2164.  
  2165.    total_read_length = 0;
  2166.    for (i = 0; i < 4; i++) {
  2167.       if (i < num_bufs && sizes[i]) {
  2168.          /* in 256-bit units minus one */
  2169.          const int read_len = (sizes[i] + 31) / 32 - 1;
  2170.  
  2171.          assert(bufs[i] % 32 == 0);
  2172.          assert(read_len < 32);
  2173.  
  2174.          enabled |= 1 << i;
  2175.          dw[i] = bufs[i] | read_len;
  2176.  
  2177.          total_read_length += read_len + 1;
  2178.       }
  2179.       else {
  2180.          dw[i] = 0;
  2181.       }
  2182.    }
  2183.  
  2184.    assert(total_read_length <= max_read_length);
  2185.  
  2186.    return enabled;
  2187. }
  2188.  
  2189. static void
  2190. gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
  2191.                               const uint32_t *bufs, const int *sizes,
  2192.                               int num_bufs,
  2193.                               struct ilo_cp *cp)
  2194. {
  2195.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
  2196.    const uint8_t cmd_len = 5;
  2197.    uint32_t buf_dw[4], buf_enabled;
  2198.  
  2199.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2200.    assert(num_bufs <= 4);
  2201.  
  2202.    /*
  2203.     * From the Sandy Bridge PRM, volume 2 part 1, page 138:
  2204.     *
  2205.     *     "The sum of all four read length fields (each incremented to
  2206.     *      represent the actual read length) must be less than or equal to 32"
  2207.     */
  2208.    buf_enabled = gen6_fill_3dstate_constant(dev,
  2209.          bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
  2210.  
  2211.    ilo_cp_begin(cp, cmd_len);
  2212.    ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
  2213.    ilo_cp_write(cp, buf_dw[0]);
  2214.    ilo_cp_write(cp, buf_dw[1]);
  2215.    ilo_cp_write(cp, buf_dw[2]);
  2216.    ilo_cp_write(cp, buf_dw[3]);
  2217.    ilo_cp_end(cp);
  2218. }
  2219.  
  2220. static void
  2221. gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
  2222.                               const uint32_t *bufs, const int *sizes,
  2223.                               int num_bufs,
  2224.                               struct ilo_cp *cp)
  2225. {
  2226.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
  2227.    const uint8_t cmd_len = 5;
  2228.    uint32_t buf_dw[4], buf_enabled;
  2229.  
  2230.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2231.    assert(num_bufs <= 4);
  2232.  
  2233.    /*
  2234.     * From the Sandy Bridge PRM, volume 2 part 1, page 161:
  2235.     *
  2236.     *     "The sum of all four read length fields (each incremented to
  2237.     *      represent the actual read length) must be less than or equal to 64"
  2238.     */
  2239.    buf_enabled = gen6_fill_3dstate_constant(dev,
  2240.          bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
  2241.  
  2242.    ilo_cp_begin(cp, cmd_len);
  2243.    ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
  2244.    ilo_cp_write(cp, buf_dw[0]);
  2245.    ilo_cp_write(cp, buf_dw[1]);
  2246.    ilo_cp_write(cp, buf_dw[2]);
  2247.    ilo_cp_write(cp, buf_dw[3]);
  2248.    ilo_cp_end(cp);
  2249. }
  2250.  
  2251. static void
  2252. gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
  2253.                               const uint32_t *bufs, const int *sizes,
  2254.                               int num_bufs,
  2255.                               struct ilo_cp *cp)
  2256. {
  2257.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
  2258.    const uint8_t cmd_len = 5;
  2259.    uint32_t buf_dw[4], buf_enabled;
  2260.  
  2261.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2262.    assert(num_bufs <= 4);
  2263.  
  2264.    /*
  2265.     * From the Sandy Bridge PRM, volume 2 part 1, page 287:
  2266.     *
  2267.     *     "The sum of all four read length fields (each incremented to
  2268.     *      represent the actual read length) must be less than or equal to 64"
  2269.     */
  2270.    buf_enabled = gen6_fill_3dstate_constant(dev,
  2271.          bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
  2272.  
  2273.    ilo_cp_begin(cp, cmd_len);
  2274.    ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
  2275.    ilo_cp_write(cp, buf_dw[0]);
  2276.    ilo_cp_write(cp, buf_dw[1]);
  2277.    ilo_cp_write(cp, buf_dw[2]);
  2278.    ilo_cp_write(cp, buf_dw[3]);
  2279.    ilo_cp_end(cp);
  2280. }
  2281.  
  2282. static void
  2283. gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
  2284.                               unsigned sample_mask,
  2285.                               struct ilo_cp *cp)
  2286. {
  2287.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
  2288.    const uint8_t cmd_len = 2;
  2289.    const unsigned valid_mask = 0xf;
  2290.  
  2291.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2292.  
  2293.    sample_mask &= valid_mask;
  2294.  
  2295.    ilo_cp_begin(cp, cmd_len);
  2296.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2297.    ilo_cp_write(cp, sample_mask);
  2298.    ilo_cp_end(cp);
  2299. }
  2300.  
  2301. static void
  2302. gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
  2303.                                     unsigned x, unsigned y,
  2304.                                     unsigned width, unsigned height,
  2305.                                     struct ilo_cp *cp)
  2306. {
  2307.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
  2308.    const uint8_t cmd_len = 4;
  2309.    unsigned xmax = x + width - 1;
  2310.    unsigned ymax = y + height - 1;
  2311.    int rect_limit;
  2312.  
  2313.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2314.  
  2315.    if (dev->gen >= ILO_GEN(7)) {
  2316.       rect_limit = 16383;
  2317.    }
  2318.    else {
  2319.       /*
  2320.        * From the Sandy Bridge PRM, volume 2 part 1, page 230:
  2321.        *
  2322.        *     "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
  2323.        *      must be an even number"
  2324.        */
  2325.       assert(y % 2 == 0);
  2326.  
  2327.       rect_limit = 8191;
  2328.    }
  2329.  
  2330.    if (x > rect_limit) x = rect_limit;
  2331.    if (y > rect_limit) y = rect_limit;
  2332.    if (xmax > rect_limit) xmax = rect_limit;
  2333.    if (ymax > rect_limit) ymax = rect_limit;
  2334.  
  2335.    ilo_cp_begin(cp, cmd_len);
  2336.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2337.    ilo_cp_write(cp, y << 16 | x);
  2338.    ilo_cp_write(cp, ymax << 16 | xmax);
  2339.  
  2340.    /*
  2341.     * There is no need to set the origin.  It is intended to support front
  2342.     * buffer rendering.
  2343.     */
  2344.    ilo_cp_write(cp, 0);
  2345.  
  2346.    ilo_cp_end(cp);
  2347. }
  2348.  
  2349. struct ilo_zs_surface_info {
  2350.    int surface_type;
  2351.    int format;
  2352.  
  2353.    struct {
  2354.       struct intel_bo *bo;
  2355.       unsigned stride;
  2356.       enum intel_tiling_mode tiling;
  2357.       uint32_t offset;
  2358.    } zs, stencil, hiz;
  2359.  
  2360.    unsigned width, height, depth;
  2361.    unsigned lod, first_layer, num_layers;
  2362.    uint32_t x_offset, y_offset;
  2363. };
  2364.  
  2365. static void
  2366. zs_init_info_null(const struct ilo_dev_info *dev,
  2367.                   struct ilo_zs_surface_info *info)
  2368. {
  2369.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2370.  
  2371.    memset(info, 0, sizeof(*info));
  2372.  
  2373.    info->surface_type = BRW_SURFACE_NULL;
  2374.    info->format = BRW_DEPTHFORMAT_D32_FLOAT;
  2375.    info->width = 1;
  2376.    info->height = 1;
  2377.    info->depth = 1;
  2378.    info->num_layers = 1;
  2379. }
  2380.  
  2381. static void
  2382. zs_init_info(const struct ilo_dev_info *dev,
  2383.              const struct ilo_texture *tex,
  2384.              enum pipe_format format,
  2385.              unsigned level,
  2386.              unsigned first_layer, unsigned num_layers,
  2387.              struct ilo_zs_surface_info *info)
  2388. {
  2389.    const bool rebase_layer = true;
  2390.    struct intel_bo * const hiz_bo = NULL;
  2391.    bool separate_stencil;
  2392.    uint32_t x_offset[3], y_offset[3];
  2393.  
  2394.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2395.  
  2396.    memset(info, 0, sizeof(*info));
  2397.  
  2398.    info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
  2399.  
  2400.    if (info->surface_type == BRW_SURFACE_CUBE) {
  2401.       /*
  2402.        * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
  2403.        *
  2404.        *     "For Other Surfaces (Cube Surfaces):
  2405.        *      This field (Minimum Array Element) is ignored."
  2406.        *
  2407.        *     "For Other Surfaces (Cube Surfaces):
  2408.        *      This field (Render Target View Extent) is ignored."
  2409.        *
  2410.        * As such, we cannot set first_layer and num_layers on cube surfaces.
  2411.        * To work around that, treat it as a 2D surface.
  2412.        */
  2413.       info->surface_type = BRW_SURFACE_2D;
  2414.    }
  2415.  
  2416.    if (dev->gen >= ILO_GEN(7)) {
  2417.       separate_stencil = true;
  2418.    }
  2419.    else {
  2420.       /*
  2421.        * From the Sandy Bridge PRM, volume 2 part 1, page 317:
  2422.        *
  2423.        *     "This field (Separate Stencil Buffer Enable) must be set to the
  2424.        *      same value (enabled or disabled) as Hierarchical Depth Buffer
  2425.        *      Enable."
  2426.        */
  2427.       separate_stencil = (hiz_bo != NULL);
  2428.    }
  2429.  
  2430.    /*
  2431.     * From the Sandy Bridge PRM, volume 2 part 1, page 317:
  2432.     *
  2433.     *     "If this field (Hierarchical Depth Buffer Enable) is enabled, the
  2434.     *      Surface Format of the depth buffer cannot be
  2435.     *      D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
  2436.     *      requires the separate stencil buffer."
  2437.     *
  2438.     * From the Ironlake PRM, volume 2 part 1, page 330:
  2439.     *
  2440.     *     "If this field (Separate Stencil Buffer Enable) is disabled, the
  2441.     *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
  2442.     *
  2443.     * There is no similar restriction for GEN6.  But when D24_UNORM_X8_UINT
  2444.     * is indeed used, the depth values output by the fragment shaders will
  2445.     * be different when read back.
  2446.     *
  2447.     * As for GEN7+, separate_stencil is always true.
  2448.     */
  2449.    switch (format) {
  2450.    case PIPE_FORMAT_Z16_UNORM:
  2451.       info->format = BRW_DEPTHFORMAT_D16_UNORM;
  2452.       break;
  2453.    case PIPE_FORMAT_Z32_FLOAT:
  2454.       info->format = BRW_DEPTHFORMAT_D32_FLOAT;
  2455.       break;
  2456.    case PIPE_FORMAT_Z24X8_UNORM:
  2457.    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
  2458.       info->format = (separate_stencil) ?
  2459.          BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
  2460.          BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
  2461.       break;
  2462.    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
  2463.       info->format = (separate_stencil) ?
  2464.          BRW_DEPTHFORMAT_D32_FLOAT :
  2465.          BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
  2466.       break;
  2467.    case PIPE_FORMAT_S8_UINT:
  2468.       if (separate_stencil) {
  2469.          info->format = BRW_DEPTHFORMAT_D32_FLOAT;
  2470.          break;
  2471.       }
  2472.       /* fall through */
  2473.    default:
  2474.       assert(!"unsupported depth/stencil format");
  2475.       zs_init_info_null(dev, info);
  2476.       return;
  2477.       break;
  2478.    }
  2479.  
  2480.    if (format != PIPE_FORMAT_S8_UINT) {
  2481.       info->zs.bo = tex->bo;
  2482.       info->zs.stride = tex->bo_stride;
  2483.       info->zs.tiling = tex->tiling;
  2484.  
  2485.       if (rebase_layer) {
  2486.          info->zs.offset = ilo_texture_get_slice_offset(tex,
  2487.                level, first_layer, &x_offset[0], &y_offset[0]);
  2488.       }
  2489.    }
  2490.  
  2491.    if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
  2492.       const struct ilo_texture *s8_tex =
  2493.          (tex->separate_s8) ? tex->separate_s8 : tex;
  2494.  
  2495.       info->stencil.bo = s8_tex->bo;
  2496.  
  2497.       /*
  2498.        * From the Sandy Bridge PRM, volume 2 part 1, page 329:
  2499.        *
  2500.        *     "The pitch must be set to 2x the value computed based on width,
  2501.        *       as the stencil buffer is stored with two rows interleaved."
  2502.        *
  2503.        * According to the classic driver, we need to do the same for GEN7+
  2504.        * even though the Ivy Bridge PRM does not say anything about it.
  2505.        */
  2506.       info->stencil.stride = s8_tex->bo_stride * 2;
  2507.  
  2508.       info->stencil.tiling = s8_tex->tiling;
  2509.  
  2510.       if (rebase_layer) {
  2511.          info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
  2512.                level, first_layer, &x_offset[1], &y_offset[1]);
  2513.       }
  2514.    }
  2515.  
  2516.    if (hiz_bo) {
  2517.       info->hiz.bo = hiz_bo;
  2518.       info->hiz.stride = 0;
  2519.       info->hiz.tiling = 0;
  2520.       info->hiz.offset = 0;
  2521.       x_offset[2] = 0;
  2522.       y_offset[2] = 0;
  2523.    }
  2524.  
  2525.    info->width = tex->base.width0;
  2526.    info->height = tex->base.height0;
  2527.    info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
  2528.       tex->base.depth0 : num_layers;
  2529.  
  2530.    info->lod = level;
  2531.    info->first_layer = first_layer;
  2532.    info->num_layers = num_layers;
  2533.  
  2534.    if (rebase_layer) {
  2535.       /* the size of the layer */
  2536.       info->width = u_minify(info->width, level);
  2537.       info->height = u_minify(info->height, level);
  2538.       if (info->surface_type == BRW_SURFACE_3D)
  2539.          info->depth = u_minify(info->depth, level);
  2540.       else
  2541.          info->depth = 1;
  2542.  
  2543.       /* no layered rendering */
  2544.       assert(num_layers == 1);
  2545.  
  2546.       info->lod = 0;
  2547.       info->first_layer = 0;
  2548.       info->num_layers = 1;
  2549.  
  2550.       /* all three share the same X/Y offsets */
  2551.       if (info->zs.bo) {
  2552.          if (info->stencil.bo) {
  2553.             assert(x_offset[0] == x_offset[1]);
  2554.             assert(y_offset[0] == y_offset[1]);
  2555.          }
  2556.  
  2557.          info->x_offset = x_offset[0];
  2558.          info->y_offset = y_offset[0];
  2559.       }
  2560.       else {
  2561.          assert(info->stencil.bo);
  2562.  
  2563.          info->x_offset = x_offset[1];
  2564.          info->y_offset = y_offset[1];
  2565.       }
  2566.  
  2567.       if (info->hiz.bo) {
  2568.          assert(info->x_offset == x_offset[2]);
  2569.          assert(info->y_offset == y_offset[2]);
  2570.       }
  2571.  
  2572.       /*
  2573.        * From the Sandy Bridge PRM, volume 2 part 1, page 326:
  2574.        *
  2575.        *     "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
  2576.        *      Coordinate Offset X) must be zero to ensure correct alignment"
  2577.        *
  2578.        * XXX Skip the check for gen6, which seems to be fine.  We need to make
  2579.        * sure that does not happen eventually.
  2580.        */
  2581.       if (dev->gen >= ILO_GEN(7)) {
  2582.          assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
  2583.          info->x_offset &= ~7;
  2584.          info->y_offset &= ~7;
  2585.       }
  2586.  
  2587.       info->width += info->x_offset;
  2588.       info->height += info->y_offset;
  2589.  
  2590.       /* we have to treat them as 2D surfaces */
  2591.       if (info->surface_type == BRW_SURFACE_CUBE) {
  2592.          assert(tex->base.width0 == tex->base.height0);
  2593.          /* we will set slice_offset to point to the single face */
  2594.          info->surface_type = BRW_SURFACE_2D;
  2595.       }
  2596.       else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
  2597.          assert(tex->base.height0 == 1);
  2598.          info->surface_type = BRW_SURFACE_2D;
  2599.       }
  2600.    }
  2601. }
  2602.  
  2603. void
  2604. ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
  2605.                         const struct ilo_texture *tex,
  2606.                         enum pipe_format format,
  2607.                         unsigned level,
  2608.                         unsigned first_layer, unsigned num_layers,
  2609.                         struct ilo_zs_surface *zs)
  2610. {
  2611.    const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
  2612.    const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
  2613.    struct ilo_zs_surface_info info;
  2614.    uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
  2615.  
  2616.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2617.  
  2618.    if (tex)
  2619.       zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
  2620.    else
  2621.       zs_init_info_null(dev, &info);
  2622.  
  2623.    switch (info.surface_type) {
  2624.    case BRW_SURFACE_NULL:
  2625.       break;
  2626.    case BRW_SURFACE_1D:
  2627.       assert(info.width <= max_2d_size && info.height == 1 &&
  2628.              info.depth <= max_array_size);
  2629.       assert(info.first_layer < max_array_size - 1 &&
  2630.              info.num_layers <= max_array_size);
  2631.       break;
  2632.    case BRW_SURFACE_2D:
  2633.       assert(info.width <= max_2d_size && info.height <= max_2d_size &&
  2634.              info.depth <= max_array_size);
  2635.       assert(info.first_layer < max_array_size - 1 &&
  2636.              info.num_layers <= max_array_size);
  2637.       break;
  2638.    case BRW_SURFACE_3D:
  2639.       assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
  2640.       assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
  2641.       assert(info.x_offset == 0 && info.y_offset == 0);
  2642.       break;
  2643.    case BRW_SURFACE_CUBE:
  2644.       assert(info.width <= max_2d_size && info.height <= max_2d_size &&
  2645.              info.depth == 1);
  2646.       assert(info.first_layer == 0 && info.num_layers == 1);
  2647.       assert(info.width == info.height);
  2648.       assert(info.x_offset == 0 && info.y_offset == 0);
  2649.       break;
  2650.    default:
  2651.       assert(!"unexpected depth surface type");
  2652.       break;
  2653.    }
  2654.  
  2655.    dw1 = info.surface_type << 29 |
  2656.          info.format << 18;
  2657.  
  2658.    if (info.zs.bo) {
  2659.       /* required for GEN6+ */
  2660.       assert(info.zs.tiling == INTEL_TILING_Y);
  2661.       assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
  2662.             info.zs.stride % 128 == 0);
  2663.       assert(info.width <= info.zs.stride);
  2664.  
  2665.       dw1 |= (info.zs.stride - 1);
  2666.       dw2 = info.zs.offset;
  2667.    }
  2668.    else {
  2669.       dw2 = 0;
  2670.    }
  2671.  
  2672.    if (dev->gen >= ILO_GEN(7)) {
  2673.       if (info.zs.bo)
  2674.          dw1 |= 1 << 28;
  2675.  
  2676.       if (info.stencil.bo)
  2677.          dw1 |= 1 << 27;
  2678.  
  2679.       if (info.hiz.bo)
  2680.          dw1 |= 1 << 22;
  2681.  
  2682.       dw3 = (info.height - 1) << 18 |
  2683.             (info.width - 1) << 4 |
  2684.             info.lod;
  2685.  
  2686.       dw4 = (info.depth - 1) << 21 |
  2687.             info.first_layer << 10;
  2688.  
  2689.       dw5 = info.y_offset << 16 | info.x_offset;
  2690.  
  2691.       dw6 = (info.num_layers - 1) << 21;
  2692.    }
  2693.    else {
  2694.       /* always Y-tiled */
  2695.       dw1 |= 1 << 27 |
  2696.              1 << 26;
  2697.  
  2698.       if (info.hiz.bo) {
  2699.          dw1 |= 1 << 22 |
  2700.                 1 << 21;
  2701.       }
  2702.  
  2703.       dw3 = (info.height - 1) << 19 |
  2704.             (info.width - 1) << 6 |
  2705.             info.lod << 2 |
  2706.             BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
  2707.  
  2708.       dw4 = (info.depth - 1) << 21 |
  2709.             info.first_layer << 10 |
  2710.             (info.num_layers - 1) << 1;
  2711.  
  2712.       dw5 = info.y_offset << 16 | info.x_offset;
  2713.  
  2714.       dw6 = 0;
  2715.    }
  2716.  
  2717.    STATIC_ASSERT(Elements(zs->payload) >= 10);
  2718.  
  2719.    zs->payload[0] = dw1;
  2720.    zs->payload[1] = dw2;
  2721.    zs->payload[2] = dw3;
  2722.    zs->payload[3] = dw4;
  2723.    zs->payload[4] = dw5;
  2724.    zs->payload[5] = dw6;
  2725.  
  2726.    /* do not increment reference count */
  2727.    zs->bo = info.zs.bo;
  2728.  
  2729.    /* separate stencil */
  2730.    if (info.stencil.bo) {
  2731.       assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
  2732.              info.stencil.stride % 128 == 0);
  2733.  
  2734.       zs->payload[6] = info.stencil.stride - 1;
  2735.       zs->payload[7] = info.stencil.offset;
  2736.  
  2737.       /* do not increment reference count */
  2738.       zs->separate_s8_bo = info.stencil.bo;
  2739.    }
  2740.    else {
  2741.       zs->payload[6] = 0;
  2742.       zs->payload[7] = 0;
  2743.       zs->separate_s8_bo = NULL;
  2744.    }
  2745.  
  2746.    /* hiz */
  2747.    if (info.hiz.bo) {
  2748.       zs->payload[8] = info.hiz.stride - 1;
  2749.       zs->payload[9] = info.hiz.offset;
  2750.  
  2751.       /* do not increment reference count */
  2752.       zs->hiz_bo = info.hiz.bo;
  2753.    }
  2754.    else {
  2755.       zs->payload[8] = 0;
  2756.       zs->payload[9] = 0;
  2757.       zs->hiz_bo = NULL;
  2758.    }
  2759. }
  2760.  
  2761. static void
  2762. gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
  2763.                                const struct ilo_zs_surface *zs,
  2764.                                struct ilo_cp *cp)
  2765. {
  2766.    const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
  2767.       ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
  2768.    const uint8_t cmd_len = 7;
  2769.  
  2770.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2771.  
  2772.    ilo_cp_begin(cp, cmd_len);
  2773.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2774.    ilo_cp_write(cp, zs->payload[0]);
  2775.    ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
  2776.          INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
  2777.    ilo_cp_write(cp, zs->payload[2]);
  2778.    ilo_cp_write(cp, zs->payload[3]);
  2779.    ilo_cp_write(cp, zs->payload[4]);
  2780.    ilo_cp_write(cp, zs->payload[5]);
  2781.    ilo_cp_end(cp);
  2782. }
  2783.  
  2784. static void
  2785. gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
  2786.                                       int x_offset, int y_offset,
  2787.                                       struct ilo_cp *cp)
  2788. {
  2789.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
  2790.    const uint8_t cmd_len = 2;
  2791.  
  2792.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2793.    assert(x_offset >= 0 && x_offset <= 31);
  2794.    assert(y_offset >= 0 && y_offset <= 31);
  2795.  
  2796.    ilo_cp_begin(cp, cmd_len);
  2797.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2798.    ilo_cp_write(cp, x_offset << 8 | y_offset);
  2799.    ilo_cp_end(cp);
  2800. }
  2801.  
  2802. static void
  2803. gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
  2804.                                        const struct pipe_poly_stipple *pattern,
  2805.                                        struct ilo_cp *cp)
  2806. {
  2807.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
  2808.    const uint8_t cmd_len = 33;
  2809.    int i;
  2810.  
  2811.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2812.    assert(Elements(pattern->stipple) == 32);
  2813.  
  2814.    ilo_cp_begin(cp, cmd_len);
  2815.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2816.    for (i = 0; i < 32; i++)
  2817.       ilo_cp_write(cp, pattern->stipple[i]);
  2818.    ilo_cp_end(cp);
  2819. }
  2820.  
  2821. static void
  2822. gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
  2823.                                unsigned pattern, unsigned factor,
  2824.                                struct ilo_cp *cp)
  2825. {
  2826.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
  2827.    const uint8_t cmd_len = 3;
  2828.    unsigned inverse;
  2829.  
  2830.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2831.    assert((pattern & 0xffff) == pattern);
  2832.    assert(factor >= 1 && factor <= 256);
  2833.  
  2834.    ilo_cp_begin(cp, cmd_len);
  2835.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2836.    ilo_cp_write(cp, pattern);
  2837.  
  2838.    if (dev->gen >= ILO_GEN(7)) {
  2839.       /* in U1.16 */
  2840.       inverse = (unsigned) (65536.0f / factor);
  2841.       ilo_cp_write(cp, inverse << 15 | factor);
  2842.    }
  2843.    else {
  2844.       /* in U1.13 */
  2845.       inverse = (unsigned) (8192.0f / factor);
  2846.       ilo_cp_write(cp, inverse << 16 | factor);
  2847.    }
  2848.  
  2849.    ilo_cp_end(cp);
  2850. }
  2851.  
  2852. static void
  2853. gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
  2854.                                      struct ilo_cp *cp)
  2855. {
  2856.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
  2857.    const uint8_t cmd_len = 3;
  2858.  
  2859.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2860.  
  2861.    ilo_cp_begin(cp, cmd_len);
  2862.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2863.    ilo_cp_write(cp, 0 << 16 | 0);
  2864.    ilo_cp_write(cp, 0 << 16 | 0);
  2865.    ilo_cp_end(cp);
  2866. }
  2867.  
  2868. static void
  2869. gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
  2870.                                int index, unsigned svbi,
  2871.                                unsigned max_svbi,
  2872.                                bool load_vertex_count,
  2873.                                struct ilo_cp *cp)
  2874. {
  2875.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
  2876.    const uint8_t cmd_len = 4;
  2877.    uint32_t dw1;
  2878.  
  2879.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2880.    assert(index >= 0 && index < 4);
  2881.  
  2882.    dw1 = index << SVB_INDEX_SHIFT;
  2883.    if (load_vertex_count)
  2884.       dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
  2885.  
  2886.    ilo_cp_begin(cp, cmd_len);
  2887.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2888.    ilo_cp_write(cp, dw1);
  2889.    ilo_cp_write(cp, svbi);
  2890.    ilo_cp_write(cp, max_svbi);
  2891.    ilo_cp_end(cp);
  2892. }
  2893.  
  2894. static void
  2895. gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
  2896.                               int num_samples,
  2897.                               const uint32_t *packed_sample_pos,
  2898.                               bool pixel_location_center,
  2899.                               struct ilo_cp *cp)
  2900. {
  2901.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
  2902.    const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
  2903.    uint32_t dw1, dw2, dw3;
  2904.  
  2905.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2906.  
  2907.    dw1 = (pixel_location_center) ?
  2908.       MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
  2909.  
  2910.    switch (num_samples) {
  2911.    case 0:
  2912.    case 1:
  2913.       dw1 |= MS_NUMSAMPLES_1;
  2914.       dw2 = 0;
  2915.       dw3 = 0;
  2916.       break;
  2917.    case 4:
  2918.       dw1 |= MS_NUMSAMPLES_4;
  2919.       dw2 = packed_sample_pos[0];
  2920.       dw3 = 0;
  2921.       break;
  2922.    case 8:
  2923.       assert(dev->gen >= ILO_GEN(7));
  2924.       dw1 |= MS_NUMSAMPLES_8;
  2925.       dw2 = packed_sample_pos[0];
  2926.       dw3 = packed_sample_pos[1];
  2927.       break;
  2928.    default:
  2929.       assert(!"unsupported sample count");
  2930.       dw1 |= MS_NUMSAMPLES_1;
  2931.       dw2 = 0;
  2932.       dw3 = 0;
  2933.       break;
  2934.    }
  2935.  
  2936.    ilo_cp_begin(cp, cmd_len);
  2937.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2938.    ilo_cp_write(cp, dw1);
  2939.    ilo_cp_write(cp, dw2);
  2940.    if (dev->gen >= ILO_GEN(7))
  2941.       ilo_cp_write(cp, dw3);
  2942.    ilo_cp_end(cp);
  2943. }
  2944.  
  2945. static void
  2946. gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
  2947.                                  const struct ilo_zs_surface *zs,
  2948.                                  struct ilo_cp *cp)
  2949. {
  2950.    const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
  2951.       ILO_GPE_CMD(0x3, 0x0, 0x06) :
  2952.       ILO_GPE_CMD(0x3, 0x1, 0x0e);
  2953.    const uint8_t cmd_len = 3;
  2954.  
  2955.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2956.  
  2957.    ilo_cp_begin(cp, cmd_len);
  2958.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2959.    /* see ilo_gpe_init_zs_surface() */
  2960.    ilo_cp_write(cp, zs->payload[6]);
  2961.    ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
  2962.          INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
  2963.    ilo_cp_end(cp);
  2964. }
  2965.  
  2966. static void
  2967. gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
  2968.                                     const struct ilo_zs_surface *zs,
  2969.                                     struct ilo_cp *cp)
  2970. {
  2971.    const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
  2972.       ILO_GPE_CMD(0x3, 0x0, 0x07) :
  2973.       ILO_GPE_CMD(0x3, 0x1, 0x0f);
  2974.    const uint8_t cmd_len = 3;
  2975.  
  2976.    ILO_GPE_VALID_GEN(dev, 6, 7);
  2977.  
  2978.    ilo_cp_begin(cp, cmd_len);
  2979.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  2980.    /* see ilo_gpe_init_zs_surface() */
  2981.    ilo_cp_write(cp, zs->payload[8]);
  2982.    ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
  2983.          INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
  2984.    ilo_cp_end(cp);
  2985. }
  2986.  
  2987. static void
  2988. gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
  2989.                                uint32_t clear_val,
  2990.                                struct ilo_cp *cp)
  2991. {
  2992.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
  2993.    const uint8_t cmd_len = 2;
  2994.  
  2995.    ILO_GPE_VALID_GEN(dev, 6, 6);
  2996.  
  2997.    ilo_cp_begin(cp, cmd_len);
  2998.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  2999.                     GEN5_DEPTH_CLEAR_VALID);
  3000.    ilo_cp_write(cp, clear_val);
  3001.    ilo_cp_end(cp);
  3002. }
  3003.  
  3004. static void
  3005. gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
  3006.                        uint32_t dw1,
  3007.                        struct intel_bo *bo, uint32_t bo_offset,
  3008.                        bool write_qword,
  3009.                        struct ilo_cp *cp)
  3010. {
  3011.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
  3012.    const uint8_t cmd_len = (write_qword) ? 5 : 4;
  3013.    const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
  3014.    const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
  3015.  
  3016.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3017.  
  3018.    if (dw1 & PIPE_CONTROL_CS_STALL) {
  3019.       /*
  3020.        * From the Sandy Bridge PRM, volume 2 part 1, page 73:
  3021.        *
  3022.        *     "1 of the following must also be set (when CS stall is set):
  3023.        *
  3024.        *       * Depth Cache Flush Enable ([0] of DW1)
  3025.        *       * Stall at Pixel Scoreboard ([1] of DW1)
  3026.        *       * Depth Stall ([13] of DW1)
  3027.        *       * Post-Sync Operation ([13] of DW1)
  3028.        *       * Render Target Cache Flush Enable ([12] of DW1)
  3029.        *       * Notify Enable ([8] of DW1)"
  3030.        *
  3031.        * From the Ivy Bridge PRM, volume 2 part 1, page 61:
  3032.        *
  3033.        *     "One of the following must also be set (when CS stall is set):
  3034.        *
  3035.        *       * Render Target Cache Flush Enable ([12] of DW1)
  3036.        *       * Depth Cache Flush Enable ([0] of DW1)
  3037.        *       * Stall at Pixel Scoreboard ([1] of DW1)
  3038.        *       * Depth Stall ([13] of DW1)
  3039.        *       * Post-Sync Operation ([13] of DW1)"
  3040.        */
  3041.       uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
  3042.                           PIPE_CONTROL_DEPTH_CACHE_FLUSH |
  3043.                           PIPE_CONTROL_STALL_AT_SCOREBOARD |
  3044.                           PIPE_CONTROL_DEPTH_STALL;
  3045.  
  3046.       /* post-sync op */
  3047.       bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
  3048.                   PIPE_CONTROL_WRITE_DEPTH_COUNT |
  3049.                   PIPE_CONTROL_WRITE_TIMESTAMP;
  3050.  
  3051.       if (dev->gen == ILO_GEN(6))
  3052.          bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
  3053.  
  3054.       assert(dw1 & bit_test);
  3055.    }
  3056.  
  3057.    if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
  3058.       /*
  3059.        * From the Sandy Bridge PRM, volume 2 part 1, page 73:
  3060.        *
  3061.        *     "Following bits must be clear (when Depth Stall is set):
  3062.        *
  3063.        *       * Render Target Cache Flush Enable ([12] of DW1)
  3064.        *       * Depth Cache Flush Enable ([0] of DW1)"
  3065.        */
  3066.       assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
  3067.                       PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
  3068.    }
  3069.  
  3070.    ilo_cp_begin(cp, cmd_len);
  3071.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  3072.    ilo_cp_write(cp, dw1);
  3073.    ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
  3074.    ilo_cp_write(cp, 0);
  3075.    if (write_qword)
  3076.       ilo_cp_write(cp, 0);
  3077.    ilo_cp_end(cp);
  3078. }
  3079.  
  3080. static void
  3081. gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
  3082.                       const struct pipe_draw_info *info,
  3083.                       const struct ilo_ib_state *ib,
  3084.                       bool rectlist,
  3085.                       struct ilo_cp *cp)
  3086. {
  3087.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
  3088.    const uint8_t cmd_len = 6;
  3089.    const int prim = (rectlist) ?
  3090.       _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
  3091.    const int vb_access = (info->indexed) ?
  3092.       GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
  3093.       GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
  3094.    const uint32_t vb_start = info->start +
  3095.       ((info->indexed) ? ib->draw_start_offset : 0);
  3096.  
  3097.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3098.  
  3099.    ilo_cp_begin(cp, cmd_len);
  3100.    ilo_cp_write(cp, cmd | (cmd_len - 2) |
  3101.                     prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
  3102.                     vb_access);
  3103.    ilo_cp_write(cp, info->count);
  3104.    ilo_cp_write(cp, vb_start);
  3105.    ilo_cp_write(cp, info->instance_count);
  3106.    ilo_cp_write(cp, info->start_instance);
  3107.    ilo_cp_write(cp, info->index_bias);
  3108.    ilo_cp_end(cp);
  3109. }
  3110.  
  3111. static uint32_t
  3112. gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
  3113.                                     const struct ilo_shader_state **cs,
  3114.                                     uint32_t *sampler_state,
  3115.                                     int *num_samplers,
  3116.                                     uint32_t *binding_table_state,
  3117.                                     int *num_surfaces,
  3118.                                     int num_ids,
  3119.                                     struct ilo_cp *cp)
  3120. {
  3121.    /*
  3122.     * From the Sandy Bridge PRM, volume 2 part 2, page 34:
  3123.     *
  3124.     *     "(Interface Descriptor Total Length) This field must have the same
  3125.     *      alignment as the Interface Descriptor Data Start Address.
  3126.     *
  3127.     *      It must be DQWord (32-byte) aligned..."
  3128.     *
  3129.     * From the Sandy Bridge PRM, volume 2 part 2, page 35:
  3130.     *
  3131.     *     "(Interface Descriptor Data Start Address) Specifies the 32-byte
  3132.     *      aligned address of the Interface Descriptor data."
  3133.     */
  3134.    const int state_align = 32 / 4;
  3135.    const int state_len = (32 / 4) * num_ids;
  3136.    uint32_t state_offset, *dw;
  3137.    int i;
  3138.  
  3139.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3140.  
  3141.    dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
  3142.          state_len, state_align, &state_offset);
  3143.  
  3144.    for (i = 0; i < num_ids; i++) {
  3145.       dw[0] = ilo_shader_get_kernel_offset(cs[i]);
  3146.       dw[1] = 1 << 18; /* SPF */
  3147.       dw[2] = sampler_state[i] |
  3148.               (num_samplers[i] + 3) / 4 << 2;
  3149.       dw[3] = binding_table_state[i] |
  3150.               num_surfaces[i];
  3151.       dw[4] = 0 << 16 |  /* CURBE Read Length */
  3152.               0;         /* CURBE Read Offset */
  3153.       dw[5] = 0; /* Barrier ID */
  3154.       dw[6] = 0;
  3155.       dw[7] = 0;
  3156.  
  3157.       dw += 8;
  3158.    }
  3159.  
  3160.    return state_offset;
  3161. }
  3162.  
  3163. static void
  3164. viewport_get_guardband(const struct ilo_dev_info *dev,
  3165.                        int center_x, int center_y,
  3166.                        int *min_gbx, int *max_gbx,
  3167.                        int *min_gby, int *max_gby)
  3168. {
  3169.    /*
  3170.     * From the Sandy Bridge PRM, volume 2 part 1, page 234:
  3171.     *
  3172.     *     "Per-Device Guardband Extents
  3173.     *
  3174.     *       - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
  3175.     *       - Maximum Post-Clamp Delta (X or Y): 16K"
  3176.     *
  3177.     *     "In addition, in order to be correctly rendered, objects must have a
  3178.     *      screenspace bounding box not exceeding 8K in the X or Y direction.
  3179.     *      This additional restriction must also be comprehended by software,
  3180.     *      i.e., enforced by use of clipping."
  3181.     *
  3182.     * From the Ivy Bridge PRM, volume 2 part 1, page 248:
  3183.     *
  3184.     *     "Per-Device Guardband Extents
  3185.     *
  3186.     *       - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
  3187.     *       - Maximum Post-Clamp Delta (X or Y): N/A"
  3188.     *
  3189.     *     "In addition, in order to be correctly rendered, objects must have a
  3190.     *      screenspace bounding box not exceeding 8K in the X or Y direction.
  3191.     *      This additional restriction must also be comprehended by software,
  3192.     *      i.e., enforced by use of clipping."
  3193.     *
  3194.     * Combined, the bounding box of any object can not exceed 8K in both
  3195.     * width and height.
  3196.     *
  3197.     * Below we set the guardband as a squre of length 8K, centered at where
  3198.     * the viewport is.  This makes sure all objects passing the GB test are
  3199.     * valid to the renderer, and those failing the XY clipping have a
  3200.     * better chance of passing the GB test.
  3201.     */
  3202.    const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
  3203.    const int half_len = 8192 / 2;
  3204.  
  3205.    /* make sure the guardband is within the valid range */
  3206.    if (center_x - half_len < -max_extent)
  3207.       center_x = -max_extent + half_len;
  3208.    else if (center_x + half_len > max_extent - 1)
  3209.       center_x = max_extent - half_len;
  3210.  
  3211.    if (center_y - half_len < -max_extent)
  3212.       center_y = -max_extent + half_len;
  3213.    else if (center_y + half_len > max_extent - 1)
  3214.       center_y = max_extent - half_len;
  3215.  
  3216.    *min_gbx = (float) (center_x - half_len);
  3217.    *max_gbx = (float) (center_x + half_len);
  3218.    *min_gby = (float) (center_y - half_len);
  3219.    *max_gby = (float) (center_y + half_len);
  3220. }
  3221.  
  3222. void
  3223. ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
  3224.                          const struct pipe_viewport_state *state,
  3225.                          struct ilo_viewport_cso *vp)
  3226. {
  3227.    const float scale_x = fabs(state->scale[0]);
  3228.    const float scale_y = fabs(state->scale[1]);
  3229.    const float scale_z = fabs(state->scale[2]);
  3230.    int min_gbx, max_gbx, min_gby, max_gby;
  3231.  
  3232.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3233.  
  3234.    viewport_get_guardband(dev,
  3235.          (int) state->translate[0],
  3236.          (int) state->translate[1],
  3237.          &min_gbx, &max_gbx, &min_gby, &max_gby);
  3238.  
  3239.    /* matrix form */
  3240.    vp->m00 = state->scale[0];
  3241.    vp->m11 = state->scale[1];
  3242.    vp->m22 = state->scale[2];
  3243.    vp->m30 = state->translate[0];
  3244.    vp->m31 = state->translate[1];
  3245.    vp->m32 = state->translate[2];
  3246.  
  3247.    /* guardband in NDC space */
  3248.    vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
  3249.    vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
  3250.    vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
  3251.    vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
  3252.  
  3253.    /* viewport in screen space */
  3254.    vp->min_x = scale_x * -1.0f + state->translate[0];
  3255.    vp->max_x = scale_x *  1.0f + state->translate[0];
  3256.    vp->min_y = scale_y * -1.0f + state->translate[1];
  3257.    vp->max_y = scale_y *  1.0f + state->translate[1];
  3258.    vp->min_z = scale_z * -1.0f + state->translate[2];
  3259.    vp->max_z = scale_z *  1.0f + state->translate[2];
  3260. }
  3261.  
  3262. static uint32_t
  3263. gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
  3264.                       const struct ilo_viewport_cso *viewports,
  3265.                       unsigned num_viewports,
  3266.                       struct ilo_cp *cp)
  3267. {
  3268.    const int state_align = 32 / 4;
  3269.    const int state_len = 8 * num_viewports;
  3270.    uint32_t state_offset, *dw;
  3271.    unsigned i;
  3272.  
  3273.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3274.  
  3275.    /*
  3276.     * From the Sandy Bridge PRM, volume 2 part 1, page 262:
  3277.     *
  3278.     *     "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
  3279.     *      stored as an array of up to 16 elements..."
  3280.     */
  3281.    assert(num_viewports && num_viewports <= 16);
  3282.  
  3283.    dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
  3284.          state_len, state_align, &state_offset);
  3285.  
  3286.    for (i = 0; i < num_viewports; i++) {
  3287.       const struct ilo_viewport_cso *vp = &viewports[i];
  3288.  
  3289.       dw[0] = fui(vp->m00);
  3290.       dw[1] = fui(vp->m11);
  3291.       dw[2] = fui(vp->m22);
  3292.       dw[3] = fui(vp->m30);
  3293.       dw[4] = fui(vp->m31);
  3294.       dw[5] = fui(vp->m32);
  3295.       dw[6] = 0;
  3296.       dw[7] = 0;
  3297.  
  3298.       dw += 8;
  3299.    }
  3300.  
  3301.    return state_offset;
  3302. }
  3303.  
  3304. static uint32_t
  3305. gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
  3306.                         const struct ilo_viewport_cso *viewports,
  3307.                         unsigned num_viewports,
  3308.                         struct ilo_cp *cp)
  3309. {
  3310.    const int state_align = 32 / 4;
  3311.    const int state_len = 4 * num_viewports;
  3312.    uint32_t state_offset, *dw;
  3313.    unsigned i;
  3314.  
  3315.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3316.  
  3317.    /*
  3318.     * From the Sandy Bridge PRM, volume 2 part 1, page 193:
  3319.     *
  3320.     *     "The viewport-related state is stored as an array of up to 16
  3321.     *      elements..."
  3322.     */
  3323.    assert(num_viewports && num_viewports <= 16);
  3324.  
  3325.    dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
  3326.          state_len, state_align, &state_offset);
  3327.  
  3328.    for (i = 0; i < num_viewports; i++) {
  3329.       const struct ilo_viewport_cso *vp = &viewports[i];
  3330.  
  3331.       dw[0] = fui(vp->min_gbx);
  3332.       dw[1] = fui(vp->max_gbx);
  3333.       dw[2] = fui(vp->min_gby);
  3334.       dw[3] = fui(vp->max_gby);
  3335.  
  3336.       dw += 4;
  3337.    }
  3338.  
  3339.    return state_offset;
  3340. }
  3341.  
  3342. static uint32_t
  3343. gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
  3344.                       const struct ilo_viewport_cso *viewports,
  3345.                       unsigned num_viewports,
  3346.                       struct ilo_cp *cp)
  3347. {
  3348.    const int state_align = 32 / 4;
  3349.    const int state_len = 2 * num_viewports;
  3350.    uint32_t state_offset, *dw;
  3351.    unsigned i;
  3352.  
  3353.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3354.  
  3355.    /*
  3356.     * From the Sandy Bridge PRM, volume 2 part 1, page 385:
  3357.     *
  3358.     *     "The viewport state is stored as an array of up to 16 elements..."
  3359.     */
  3360.    assert(num_viewports && num_viewports <= 16);
  3361.  
  3362.    dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
  3363.          state_len, state_align, &state_offset);
  3364.  
  3365.    for (i = 0; i < num_viewports; i++) {
  3366.       const struct ilo_viewport_cso *vp = &viewports[i];
  3367.  
  3368.       dw[0] = fui(vp->min_z);
  3369.       dw[1] = fui(vp->max_z);
  3370.  
  3371.       dw += 2;
  3372.    }
  3373.  
  3374.    return state_offset;
  3375. }
  3376.  
  3377. static uint32_t
  3378. gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
  3379.                            const struct pipe_stencil_ref *stencil_ref,
  3380.                            float alpha_ref,
  3381.                            const struct pipe_blend_color *blend_color,
  3382.                            struct ilo_cp *cp)
  3383. {
  3384.    const int state_align = 64 / 4;
  3385.    const int state_len = 6;
  3386.    uint32_t state_offset, *dw;
  3387.  
  3388.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3389.  
  3390.    dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
  3391.          state_len, state_align, &state_offset);
  3392.  
  3393.    dw[0] = stencil_ref->ref_value[0] << 24 |
  3394.            stencil_ref->ref_value[1] << 16 |
  3395.            BRW_ALPHATEST_FORMAT_UNORM8;
  3396.    dw[1] = float_to_ubyte(alpha_ref);
  3397.    dw[2] = fui(blend_color->color[0]);
  3398.    dw[3] = fui(blend_color->color[1]);
  3399.    dw[4] = fui(blend_color->color[2]);
  3400.    dw[5] = fui(blend_color->color[3]);
  3401.  
  3402.    return state_offset;
  3403. }
  3404.  
  3405. static int
  3406. gen6_blend_factor_dst_alpha_forced_one(int factor)
  3407. {
  3408.    switch (factor) {
  3409.    case BRW_BLENDFACTOR_DST_ALPHA:
  3410.       return BRW_BLENDFACTOR_ONE;
  3411.    case BRW_BLENDFACTOR_INV_DST_ALPHA:
  3412.    case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
  3413.       return BRW_BLENDFACTOR_ZERO;
  3414.    default:
  3415.       return factor;
  3416.    }
  3417. }
  3418.  
  3419. static uint32_t
  3420. blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
  3421.                           const struct pipe_rt_blend_state *rt,
  3422.                           bool dst_alpha_forced_one)
  3423. {
  3424.    int rgb_src, rgb_dst, a_src, a_dst;
  3425.    uint32_t dw;
  3426.  
  3427.    if (!rt->blend_enable)
  3428.       return 0;
  3429.  
  3430.    rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
  3431.    rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
  3432.    a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
  3433.    a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
  3434.  
  3435.    if (dst_alpha_forced_one) {
  3436.       rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
  3437.       rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
  3438.       a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
  3439.       a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
  3440.    }
  3441.  
  3442.    dw = 1 << 31 |
  3443.         gen6_translate_pipe_blend(rt->alpha_func) << 26 |
  3444.         a_src << 20 |
  3445.         a_dst << 15 |
  3446.         gen6_translate_pipe_blend(rt->rgb_func) << 11 |
  3447.         rgb_src << 5 |
  3448.         rgb_dst;
  3449.  
  3450.    if (rt->rgb_func != rt->alpha_func ||
  3451.        rgb_src != a_src || rgb_dst != a_dst)
  3452.       dw |= 1 << 30;
  3453.  
  3454.    return dw;
  3455. }
  3456.  
  3457. void
  3458. ilo_gpe_init_blend(const struct ilo_dev_info *dev,
  3459.                    const struct pipe_blend_state *state,
  3460.                    struct ilo_blend_state *blend)
  3461. {
  3462.    unsigned num_cso, i;
  3463.  
  3464.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3465.  
  3466.    if (state->independent_blend_enable) {
  3467.       num_cso = Elements(blend->cso);
  3468.    }
  3469.    else {
  3470.       memset(blend->cso, 0, sizeof(blend->cso));
  3471.       num_cso = 1;
  3472.    }
  3473.  
  3474.    blend->independent_blend_enable = state->independent_blend_enable;
  3475.    blend->alpha_to_coverage = state->alpha_to_coverage;
  3476.    blend->dual_blend = false;
  3477.  
  3478.    for (i = 0; i < num_cso; i++) {
  3479.       const struct pipe_rt_blend_state *rt = &state->rt[i];
  3480.       struct ilo_blend_cso *cso = &blend->cso[i];
  3481.       bool dual_blend;
  3482.  
  3483.       cso->payload[0] = 0;
  3484.       cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
  3485.                             0x3;
  3486.  
  3487.       if (!(rt->colormask & PIPE_MASK_A))
  3488.          cso->payload[1] |= 1 << 27;
  3489.       if (!(rt->colormask & PIPE_MASK_R))
  3490.          cso->payload[1] |= 1 << 26;
  3491.       if (!(rt->colormask & PIPE_MASK_G))
  3492.          cso->payload[1] |= 1 << 25;
  3493.       if (!(rt->colormask & PIPE_MASK_B))
  3494.          cso->payload[1] |= 1 << 24;
  3495.  
  3496.       if (state->dither)
  3497.          cso->payload[1] |= 1 << 12;
  3498.  
  3499.       /*
  3500.        * From the Sandy Bridge PRM, volume 2 part 1, page 365:
  3501.        *
  3502.        *     "Color Buffer Blending and Logic Ops must not be enabled
  3503.        *      simultaneously, or behavior is UNDEFINED."
  3504.        *
  3505.        * Since state->logicop_enable takes precedence over rt->blend_enable,
  3506.        * no special care is needed.
  3507.        */
  3508.       if (state->logicop_enable) {
  3509.          cso->dw_logicop = 1 << 22 |
  3510.             gen6_translate_pipe_logicop(state->logicop_func) << 18;
  3511.  
  3512.          cso->dw_blend = 0;
  3513.          cso->dw_blend_dst_alpha_forced_one = 0;
  3514.  
  3515.          dual_blend = false;
  3516.       }
  3517.       else {
  3518.          cso->dw_logicop = 0;
  3519.  
  3520.          cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
  3521.          cso->dw_blend_dst_alpha_forced_one =
  3522.             blend_get_rt_blend_enable(dev, rt, true);
  3523.  
  3524.          dual_blend = (rt->blend_enable &&
  3525.                util_blend_state_is_dual(state, i));
  3526.       }
  3527.  
  3528.       cso->dw_alpha_mod = 0;
  3529.  
  3530.       if (state->alpha_to_coverage) {
  3531.          cso->dw_alpha_mod |= 1 << 31;
  3532.  
  3533.          if (dev->gen >= ILO_GEN(7))
  3534.             cso->dw_alpha_mod |= 1 << 29;
  3535.       }
  3536.  
  3537.       /*
  3538.        * From the Sandy Bridge PRM, volume 2 part 1, page 378:
  3539.        *
  3540.        *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
  3541.        *      must be disabled."
  3542.        */
  3543.       if (state->alpha_to_one && !dual_blend)
  3544.          cso->dw_alpha_mod |= 1 << 30;
  3545.  
  3546.       if (dual_blend)
  3547.          blend->dual_blend = true;
  3548.    }
  3549. }
  3550.  
  3551. static uint32_t
  3552. gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
  3553.                       const struct ilo_blend_state *blend,
  3554.                       const struct ilo_fb_state *fb,
  3555.                       const struct pipe_alpha_state *alpha,
  3556.                       struct ilo_cp *cp)
  3557. {
  3558.    const int state_align = 64 / 4;
  3559.    int state_len;
  3560.    uint32_t state_offset, *dw;
  3561.    unsigned num_targets, i;
  3562.  
  3563.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3564.  
  3565.    /*
  3566.     * From the Sandy Bridge PRM, volume 2 part 1, page 376:
  3567.     *
  3568.     *     "The blend state is stored as an array of up to 8 elements..."
  3569.     */
  3570.    num_targets = fb->state.nr_cbufs;
  3571.    assert(num_targets <= 8);
  3572.  
  3573.    if (!num_targets) {
  3574.       if (!alpha->enabled)
  3575.          return 0;
  3576.       /* to be able to reference alpha func */
  3577.       num_targets = 1;
  3578.    }
  3579.  
  3580.    state_len = 2 * num_targets;
  3581.  
  3582.    dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
  3583.          state_len, state_align, &state_offset);
  3584.  
  3585.    for (i = 0; i < num_targets; i++) {
  3586.       const unsigned idx = (blend->independent_blend_enable) ? i : 0;
  3587.       const struct ilo_blend_cso *cso = &blend->cso[idx];
  3588.       const int num_samples = fb->num_samples;
  3589.       const struct util_format_description *format_desc =
  3590.          (idx < fb->state.nr_cbufs) ?
  3591.          util_format_description(fb->state.cbufs[idx]->format) : NULL;
  3592.       bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
  3593.  
  3594.       rt_is_unorm = true;
  3595.       rt_is_pure_integer = false;
  3596.       rt_dst_alpha_forced_one = false;
  3597.  
  3598.       if (format_desc) {
  3599.          int ch;
  3600.  
  3601.          switch (format_desc->format) {
  3602.          case PIPE_FORMAT_B8G8R8X8_UNORM:
  3603.             /* force alpha to one when the HW format has alpha */
  3604.             assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
  3605.                   == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
  3606.             rt_dst_alpha_forced_one = true;
  3607.             break;
  3608.          default:
  3609.             break;
  3610.          }
  3611.  
  3612.          for (ch = 0; ch < 4; ch++) {
  3613.             if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
  3614.                continue;
  3615.  
  3616.             if (format_desc->channel[ch].pure_integer) {
  3617.                rt_is_unorm = false;
  3618.                rt_is_pure_integer = true;
  3619.                break;
  3620.             }
  3621.  
  3622.             if (!format_desc->channel[ch].normalized ||
  3623.                 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
  3624.                rt_is_unorm = false;
  3625.          }
  3626.       }
  3627.  
  3628.       dw[0] = cso->payload[0];
  3629.       dw[1] = cso->payload[1];
  3630.  
  3631.       if (!rt_is_pure_integer) {
  3632.          if (rt_dst_alpha_forced_one)
  3633.             dw[0] |= cso->dw_blend_dst_alpha_forced_one;
  3634.          else
  3635.             dw[0] |= cso->dw_blend;
  3636.       }
  3637.  
  3638.       /*
  3639.        * From the Sandy Bridge PRM, volume 2 part 1, page 365:
  3640.        *
  3641.        *     "Logic Ops are only supported on *_UNORM surfaces (excluding
  3642.        *      _SRGB variants), otherwise Logic Ops must be DISABLED."
  3643.        *
  3644.        * Since logicop is ignored for non-UNORM color buffers, no special care
  3645.        * is needed.
  3646.        */
  3647.       if (rt_is_unorm)
  3648.          dw[1] |= cso->dw_logicop;
  3649.  
  3650.       /*
  3651.        * From the Sandy Bridge PRM, volume 2 part 1, page 356:
  3652.        *
  3653.        *     "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
  3654.        *      Dither both must be disabled."
  3655.        *
  3656.        * There is no such limitation on GEN7, or for AlphaToOne.  But GL
  3657.        * requires that anyway.
  3658.        */
  3659.       if (num_samples > 1)
  3660.          dw[1] |= cso->dw_alpha_mod;
  3661.  
  3662.       /*
  3663.        * From the Sandy Bridge PRM, volume 2 part 1, page 382:
  3664.        *
  3665.        *     "Alpha Test can only be enabled if Pixel Shader outputs a float
  3666.        *      alpha value."
  3667.        */
  3668.       if (alpha->enabled && !rt_is_pure_integer) {
  3669.          dw[1] |= 1 << 16 |
  3670.                   gen6_translate_dsa_func(alpha->func) << 13;
  3671.       }
  3672.  
  3673.       dw += 2;
  3674.    }
  3675.  
  3676.    return state_offset;
  3677. }
  3678.  
  3679. void
  3680. ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
  3681.                  const struct pipe_depth_stencil_alpha_state *state,
  3682.                  struct ilo_dsa_state *dsa)
  3683. {
  3684.    const struct pipe_depth_state *depth = &state->depth;
  3685.    const struct pipe_stencil_state *stencil0 = &state->stencil[0];
  3686.    const struct pipe_stencil_state *stencil1 = &state->stencil[1];
  3687.    uint32_t *dw;
  3688.  
  3689.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3690.  
  3691.    /* copy alpha state for later use */
  3692.    dsa->alpha = state->alpha;
  3693.  
  3694.    STATIC_ASSERT(Elements(dsa->payload) >= 3);
  3695.    dw = dsa->payload;
  3696.  
  3697.    /*
  3698.     * From the Sandy Bridge PRM, volume 2 part 1, page 359:
  3699.     *
  3700.     *     "If the Depth Buffer is either undefined or does not have a surface
  3701.     *      format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
  3702.     *      stencil buffer is disabled, Stencil Test Enable must be DISABLED"
  3703.     *
  3704.     * From the Sandy Bridge PRM, volume 2 part 1, page 370:
  3705.     *
  3706.     *     "This field (Stencil Test Enable) cannot be enabled if
  3707.     *      Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
  3708.     *
  3709.     * TODO We do not check these yet.
  3710.     */
  3711.    if (stencil0->enabled) {
  3712.       dw[0] = 1 << 31 |
  3713.               gen6_translate_dsa_func(stencil0->func) << 28 |
  3714.               gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
  3715.               gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
  3716.               gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
  3717.       if (stencil0->writemask)
  3718.          dw[0] |= 1 << 18;
  3719.  
  3720.       dw[1] = stencil0->valuemask << 24 |
  3721.               stencil0->writemask << 16;
  3722.  
  3723.       if (stencil1->enabled) {
  3724.          dw[0] |= 1 << 15 |
  3725.                   gen6_translate_dsa_func(stencil1->func) << 12 |
  3726.                   gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
  3727.                   gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
  3728.                   gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
  3729.          if (stencil1->writemask)
  3730.             dw[0] |= 1 << 18;
  3731.  
  3732.          dw[1] |= stencil1->valuemask << 8 |
  3733.                   stencil1->writemask;
  3734.       }
  3735.    }
  3736.    else {
  3737.       dw[0] = 0;
  3738.       dw[1] = 0;
  3739.    }
  3740.  
  3741.    /*
  3742.     * From the Sandy Bridge PRM, volume 2 part 1, page 360:
  3743.     *
  3744.     *     "Enabling the Depth Test function without defining a Depth Buffer is
  3745.     *      UNDEFINED."
  3746.     *
  3747.     * From the Sandy Bridge PRM, volume 2 part 1, page 375:
  3748.     *
  3749.     *     "A Depth Buffer must be defined before enabling writes to it, or
  3750.     *      operation is UNDEFINED."
  3751.     *
  3752.     * TODO We do not check these yet.
  3753.     */
  3754.    dw[2] = depth->enabled << 31 |
  3755.            depth->writemask << 26;
  3756.    if (depth->enabled)
  3757.       dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
  3758.    else
  3759.       dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
  3760. }
  3761.  
  3762. static uint32_t
  3763. gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
  3764.                               const struct ilo_dsa_state *dsa,
  3765.                               struct ilo_cp *cp)
  3766. {
  3767.    const int state_align = 64 / 4;
  3768.    const int state_len = 3;
  3769.    uint32_t state_offset, *dw;
  3770.  
  3771.  
  3772.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3773.  
  3774.    dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
  3775.          state_len, state_align, &state_offset);
  3776.  
  3777.    dw[0] = dsa->payload[0];
  3778.    dw[1] = dsa->payload[1];
  3779.    dw[2] = dsa->payload[2];
  3780.  
  3781.    return state_offset;
  3782. }
  3783.  
  3784. void
  3785. ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
  3786.                     unsigned start_slot,
  3787.                     unsigned num_states,
  3788.                     const struct pipe_scissor_state *states,
  3789.                     struct ilo_scissor_state *scissor)
  3790. {
  3791.    unsigned i;
  3792.  
  3793.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3794.  
  3795.    for (i = 0; i < num_states; i++) {
  3796.       uint16_t min_x, min_y, max_x, max_y;
  3797.  
  3798.       /* both max and min are inclusive in SCISSOR_RECT */
  3799.       if (states[i].minx < states[i].maxx &&
  3800.           states[i].miny < states[i].maxy) {
  3801.          min_x = states[i].minx;
  3802.          min_y = states[i].miny;
  3803.          max_x = states[i].maxx - 1;
  3804.          max_y = states[i].maxy - 1;
  3805.       }
  3806.       else {
  3807.          /* we have to make min greater than max */
  3808.          min_x = 1;
  3809.          min_y = 1;
  3810.          max_x = 0;
  3811.          max_y = 0;
  3812.       }
  3813.  
  3814.       scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
  3815.       scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
  3816.    }
  3817.  
  3818.    if (!start_slot && num_states)
  3819.       scissor->scissor0 = states[0];
  3820. }
  3821.  
  3822. void
  3823. ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
  3824.                          struct ilo_scissor_state *scissor)
  3825. {
  3826.    unsigned i;
  3827.  
  3828.    for (i = 0; i < Elements(scissor->payload); i += 2) {
  3829.       scissor->payload[i + 0] = 1 << 16 | 1;
  3830.       scissor->payload[i + 1] = 0;
  3831.    }
  3832. }
  3833.  
  3834. static uint32_t
  3835. gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
  3836.                        const struct ilo_scissor_state *scissor,
  3837.                        unsigned num_viewports,
  3838.                        struct ilo_cp *cp)
  3839. {
  3840.    const int state_align = 32 / 4;
  3841.    const int state_len = 2 * num_viewports;
  3842.    uint32_t state_offset, *dw;
  3843.  
  3844.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3845.  
  3846.    /*
  3847.     * From the Sandy Bridge PRM, volume 2 part 1, page 263:
  3848.     *
  3849.     *     "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
  3850.     *      stored as an array of up to 16 elements..."
  3851.     */
  3852.    assert(num_viewports && num_viewports <= 16);
  3853.  
  3854.    dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
  3855.          state_len, state_align, &state_offset);
  3856.  
  3857.    memcpy(dw, scissor->payload, state_len * 4);
  3858.  
  3859.    return state_offset;
  3860. }
  3861.  
  3862. static uint32_t
  3863. gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
  3864.                               uint32_t *surface_states,
  3865.                               int num_surface_states,
  3866.                               struct ilo_cp *cp)
  3867. {
  3868.    const int state_align = 32 / 4;
  3869.    const int state_len = num_surface_states;
  3870.    uint32_t state_offset, *dw;
  3871.  
  3872.    ILO_GPE_VALID_GEN(dev, 6, 7);
  3873.  
  3874.    /*
  3875.     * From the Sandy Bridge PRM, volume 4 part 1, page 69:
  3876.     *
  3877.     *     "It is stored as an array of up to 256 elements..."
  3878.     */
  3879.    assert(num_surface_states <= 256);
  3880.  
  3881.    if (!num_surface_states)
  3882.       return 0;
  3883.  
  3884.    dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
  3885.          state_len, state_align, &state_offset);
  3886.    memcpy(dw, surface_states,
  3887.          num_surface_states * sizeof(surface_states[0]));
  3888.  
  3889.    return state_offset;
  3890. }
  3891.  
  3892. void
  3893. ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
  3894.                                     unsigned width, unsigned height,
  3895.                                     unsigned depth, unsigned level,
  3896.                                     struct ilo_view_surface *surf)
  3897. {
  3898.    uint32_t *dw;
  3899.  
  3900.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3901.  
  3902.    /*
  3903.     * From the Sandy Bridge PRM, volume 4 part 1, page 71:
  3904.     *
  3905.     *     "A null surface will be used in instances where an actual surface is
  3906.     *      not bound. When a write message is generated to a null surface, no
  3907.     *      actual surface is written to. When a read message (including any
  3908.     *      sampling engine message) is generated to a null surface, the result
  3909.     *      is all zeros. Note that a null surface type is allowed to be used
  3910.     *      with all messages, even if it is not specificially indicated as
  3911.     *      supported. All of the remaining fields in surface state are ignored
  3912.     *      for null surfaces, with the following exceptions:
  3913.     *
  3914.     *        * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
  3915.     *          depth buffer's corresponding state for all render target
  3916.     *          surfaces, including null.
  3917.     *        * Surface Format must be R8G8B8A8_UNORM."
  3918.     *
  3919.     * From the Sandy Bridge PRM, volume 4 part 1, page 82:
  3920.     *
  3921.     *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
  3922.     *      true"
  3923.     */
  3924.  
  3925.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  3926.    dw = surf->payload;
  3927.  
  3928.    dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
  3929.            BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
  3930.  
  3931.    dw[1] = 0;
  3932.  
  3933.    dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
  3934.            (width  - 1) << BRW_SURFACE_WIDTH_SHIFT |
  3935.            level << BRW_SURFACE_LOD_SHIFT;
  3936.  
  3937.    dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
  3938.            BRW_SURFACE_TILED;
  3939.  
  3940.    dw[4] = 0;
  3941.    dw[5] = 0;
  3942.  
  3943.    surf->bo = NULL;
  3944. }
  3945.  
  3946. void
  3947. ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
  3948.                                           const struct ilo_buffer *buf,
  3949.                                           unsigned offset, unsigned size,
  3950.                                           unsigned struct_size,
  3951.                                           enum pipe_format elem_format,
  3952.                                           bool is_rt, bool render_cache_rw,
  3953.                                           struct ilo_view_surface *surf)
  3954. {
  3955.    const int elem_size = util_format_get_blocksize(elem_format);
  3956.    int width, height, depth, pitch;
  3957.    int surface_format, num_entries;
  3958.    uint32_t *dw;
  3959.  
  3960.    ILO_GPE_VALID_GEN(dev, 6, 6);
  3961.  
  3962.    /*
  3963.     * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
  3964.     * structure in a buffer.
  3965.     */
  3966.  
  3967.    surface_format = ilo_translate_color_format(elem_format);
  3968.  
  3969.    num_entries = size / struct_size;
  3970.    /* see if there is enough space to fit another element */
  3971.    if (size % struct_size >= elem_size)
  3972.       num_entries++;
  3973.  
  3974.    /*
  3975.     * From the Sandy Bridge PRM, volume 4 part 1, page 76:
  3976.     *
  3977.     *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
  3978.     *      Address) specifies the base address of first element of the
  3979.     *      surface. The surface is interpreted as a simple array of that
  3980.     *      single element type. The address must be naturally-aligned to the
  3981.     *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
  3982.     *      must be 16-byte aligned).
  3983.     *
  3984.     *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
  3985.     *      the base address of the first element of the surface, computed in
  3986.     *      software by adding the surface base address to the byte offset of
  3987.     *      the element in the buffer."
  3988.     */
  3989.    if (is_rt)
  3990.       assert(offset % elem_size == 0);
  3991.  
  3992.    /*
  3993.     * From the Sandy Bridge PRM, volume 4 part 1, page 77:
  3994.     *
  3995.     *     "For buffer surfaces, the number of entries in the buffer ranges
  3996.     *      from 1 to 2^27."
  3997.     */
  3998.    assert(num_entries >= 1 && num_entries <= 1 << 27);
  3999.  
  4000.    /*
  4001.     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  4002.     *
  4003.     *     "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
  4004.     *      indicates the size of the structure."
  4005.     */
  4006.    pitch = struct_size;
  4007.  
  4008.    pitch--;
  4009.    num_entries--;
  4010.    /* bits [6:0] */
  4011.    width  = (num_entries & 0x0000007f);
  4012.    /* bits [19:7] */
  4013.    height = (num_entries & 0x000fff80) >> 7;
  4014.    /* bits [26:20] */
  4015.    depth  = (num_entries & 0x07f00000) >> 20;
  4016.  
  4017.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  4018.    dw = surf->payload;
  4019.  
  4020.    dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
  4021.            surface_format << BRW_SURFACE_FORMAT_SHIFT;
  4022.    if (render_cache_rw)
  4023.       dw[0] |= BRW_SURFACE_RC_READ_WRITE;
  4024.  
  4025.    dw[1] = offset;
  4026.  
  4027.    dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
  4028.            width << BRW_SURFACE_WIDTH_SHIFT;
  4029.  
  4030.    dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
  4031.            pitch << BRW_SURFACE_PITCH_SHIFT;
  4032.  
  4033.    dw[4] = 0;
  4034.    dw[5] = 0;
  4035.  
  4036.    /* do not increment reference count */
  4037.    surf->bo = buf->bo;
  4038. }
  4039.  
  4040. void
  4041. ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
  4042.                                            const struct ilo_texture *tex,
  4043.                                            enum pipe_format format,
  4044.                                            unsigned first_level,
  4045.                                            unsigned num_levels,
  4046.                                            unsigned first_layer,
  4047.                                            unsigned num_layers,
  4048.                                            bool is_rt, bool render_cache_rw,
  4049.                                            struct ilo_view_surface *surf)
  4050. {
  4051.    int surface_type, surface_format;
  4052.    int width, height, depth, pitch, lod;
  4053.    unsigned layer_offset, x_offset, y_offset;
  4054.    uint32_t *dw;
  4055.  
  4056.    ILO_GPE_VALID_GEN(dev, 6, 6);
  4057.  
  4058.    surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
  4059.    assert(surface_type != BRW_SURFACE_BUFFER);
  4060.  
  4061.    if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
  4062.       format = PIPE_FORMAT_Z32_FLOAT;
  4063.  
  4064.    if (is_rt)
  4065.       surface_format = ilo_translate_render_format(format);
  4066.    else
  4067.       surface_format = ilo_translate_texture_format(format);
  4068.    assert(surface_format >= 0);
  4069.  
  4070.    width = tex->base.width0;
  4071.    height = tex->base.height0;
  4072.    depth = (tex->base.target == PIPE_TEXTURE_3D) ?
  4073.       tex->base.depth0 : num_layers;
  4074.    pitch = tex->bo_stride;
  4075.  
  4076.    if (surface_type == BRW_SURFACE_CUBE) {
  4077.       /*
  4078.        * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  4079.        *
  4080.        *     "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
  4081.        *      range of this field (Depth) is [0,84], indicating the number of
  4082.        *      cube array elements (equal to the number of underlying 2D array
  4083.        *      elements divided by 6). For other surfaces, this field must be
  4084.        *      zero."
  4085.        *
  4086.        * When is_rt is true, we treat the texture as a 2D one to avoid the
  4087.        * restriction.
  4088.        */
  4089.       if (is_rt) {
  4090.          surface_type = BRW_SURFACE_2D;
  4091.       }
  4092.       else {
  4093.          assert(num_layers % 6 == 0);
  4094.          depth = num_layers / 6;
  4095.       }
  4096.    }
  4097.  
  4098.    /* sanity check the size */
  4099.    assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
  4100.    switch (surface_type) {
  4101.    case BRW_SURFACE_1D:
  4102.       assert(width <= 8192 && height == 1 && depth <= 512);
  4103.       assert(first_layer < 512 && num_layers <= 512);
  4104.       break;
  4105.    case BRW_SURFACE_2D:
  4106.       assert(width <= 8192 && height <= 8192 && depth <= 512);
  4107.       assert(first_layer < 512 && num_layers <= 512);
  4108.       break;
  4109.    case BRW_SURFACE_3D:
  4110.       assert(width <= 2048 && height <= 2048 && depth <= 2048);
  4111.       assert(first_layer < 2048 && num_layers <= 512);
  4112.       if (!is_rt)
  4113.          assert(first_layer == 0);
  4114.       break;
  4115.    case BRW_SURFACE_CUBE:
  4116.       assert(width <= 8192 && height <= 8192 && depth <= 85);
  4117.       assert(width == height);
  4118.       assert(first_layer < 512 && num_layers <= 512);
  4119.       if (is_rt)
  4120.          assert(first_layer == 0);
  4121.       break;
  4122.    default:
  4123.       assert(!"unexpected surface type");
  4124.       break;
  4125.    }
  4126.  
  4127.    /* non-full array spacing is supported only on GEN7+ */
  4128.    assert(tex->array_spacing_full);
  4129.    /* non-interleaved samples are supported only on GEN7+ */
  4130.    if (tex->base.nr_samples > 1)
  4131.       assert(tex->interleaved);
  4132.  
  4133.    if (is_rt) {
  4134.       /*
  4135.        * Compute the offset to the layer manually.
  4136.        *
  4137.        * For rendering, the hardware requires LOD to be the same for all
  4138.        * render targets and the depth buffer.  We need to compute the offset
  4139.        * to the layer manually and always set LOD to 0.
  4140.        */
  4141.       if (true) {
  4142.          /* we lose the capability for layered rendering */
  4143.          assert(num_layers == 1);
  4144.  
  4145.          layer_offset = ilo_texture_get_slice_offset(tex,
  4146.                first_level, first_layer, &x_offset, &y_offset);
  4147.  
  4148.          assert(x_offset % 4 == 0);
  4149.          assert(y_offset % 2 == 0);
  4150.          x_offset /= 4;
  4151.          y_offset /= 2;
  4152.  
  4153.          /* derive the size for the LOD */
  4154.          width = u_minify(width, first_level);
  4155.          height = u_minify(height, first_level);
  4156.          if (surface_type == BRW_SURFACE_3D)
  4157.             depth = u_minify(depth, first_level);
  4158.          else
  4159.             depth = 1;
  4160.  
  4161.          first_level = 0;
  4162.          first_layer = 0;
  4163.          lod = 0;
  4164.       }
  4165.       else {
  4166.          layer_offset = 0;
  4167.          x_offset = 0;
  4168.          y_offset = 0;
  4169.       }
  4170.  
  4171.       assert(num_levels == 1);
  4172.       lod = first_level;
  4173.    }
  4174.    else {
  4175.       layer_offset = 0;
  4176.       x_offset = 0;
  4177.       y_offset = 0;
  4178.  
  4179.       lod = num_levels - 1;
  4180.    }
  4181.  
  4182.    /*
  4183.     * From the Sandy Bridge PRM, volume 4 part 1, page 76:
  4184.     *
  4185.     *     "Linear render target surface base addresses must be element-size
  4186.     *      aligned, for non-YUV surface formats, or a multiple of 2
  4187.     *      element-sizes for YUV surface formats. Other linear surfaces have
  4188.     *      no alignment requirements (byte alignment is sufficient.)"
  4189.     *
  4190.     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  4191.     *
  4192.     *     "For linear render target surfaces, the pitch must be a multiple
  4193.     *      of the element size for non-YUV surface formats. Pitch must be a
  4194.     *      multiple of 2 * element size for YUV surface formats."
  4195.     *
  4196.     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
  4197.     *
  4198.     *     "For linear surfaces, this field (X Offset) must be zero"
  4199.     */
  4200.    if (tex->tiling == INTEL_TILING_NONE) {
  4201.       if (is_rt) {
  4202.          const int elem_size = util_format_get_blocksize(format);
  4203.          assert(layer_offset % elem_size == 0);
  4204.          assert(pitch % elem_size == 0);
  4205.       }
  4206.  
  4207.       assert(!x_offset);
  4208.    }
  4209.  
  4210.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  4211.    dw = surf->payload;
  4212.  
  4213.    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
  4214.            surface_format << BRW_SURFACE_FORMAT_SHIFT |
  4215.            BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
  4216.  
  4217.    if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
  4218.       dw[0] |= 1 << 9 |
  4219.                BRW_SURFACE_CUBEFACE_ENABLES;
  4220.    }
  4221.  
  4222.    if (render_cache_rw)
  4223.       dw[0] |= BRW_SURFACE_RC_READ_WRITE;
  4224.  
  4225.    dw[1] = layer_offset;
  4226.  
  4227.    dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
  4228.            (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
  4229.            lod << BRW_SURFACE_LOD_SHIFT;
  4230.  
  4231.    dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
  4232.            (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
  4233.            ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
  4234.  
  4235.    dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
  4236.            first_layer << 17 |
  4237.            (num_layers - 1) << 8 |
  4238.            ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
  4239.                                          BRW_SURFACE_MULTISAMPLECOUNT_1);
  4240.  
  4241.    dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
  4242.            y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
  4243.    if (tex->valign_4)
  4244.       dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
  4245.  
  4246.    /* do not increment reference count */
  4247.    surf->bo = tex->bo;
  4248. }
  4249.  
  4250. static uint32_t
  4251. gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
  4252.                         const struct ilo_view_surface *surf,
  4253.                         bool for_render,
  4254.                         struct ilo_cp *cp)
  4255. {
  4256.    const int state_align = 32 / 4;
  4257.    const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
  4258.    uint32_t state_offset;
  4259.    uint32_t read_domains, write_domain;
  4260.  
  4261.    ILO_GPE_VALID_GEN(dev, 6, 7);
  4262.  
  4263.    if (for_render) {
  4264.       read_domains = INTEL_DOMAIN_RENDER;
  4265.       write_domain = INTEL_DOMAIN_RENDER;
  4266.    }
  4267.    else {
  4268.       read_domains = INTEL_DOMAIN_SAMPLER;
  4269.       write_domain = 0;
  4270.    }
  4271.  
  4272.    ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
  4273.  
  4274.    STATIC_ASSERT(Elements(surf->payload) >= 8);
  4275.  
  4276.    ilo_cp_write(cp, surf->payload[0]);
  4277.    ilo_cp_write_bo(cp, surf->payload[1],
  4278.          surf->bo, read_domains, write_domain);
  4279.    ilo_cp_write(cp, surf->payload[2]);
  4280.    ilo_cp_write(cp, surf->payload[3]);
  4281.    ilo_cp_write(cp, surf->payload[4]);
  4282.    ilo_cp_write(cp, surf->payload[5]);
  4283.  
  4284.    if (dev->gen >= ILO_GEN(7)) {
  4285.       ilo_cp_write(cp, surf->payload[6]);
  4286.       ilo_cp_write(cp, surf->payload[7]);
  4287.    }
  4288.  
  4289.    ilo_cp_end(cp);
  4290.  
  4291.    return state_offset;
  4292. }
  4293.  
  4294. static uint32_t
  4295. gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
  4296.                            const struct pipe_stream_output_target *so,
  4297.                            const struct pipe_stream_output_info *so_info,
  4298.                            int so_index,
  4299.                            struct ilo_cp *cp)
  4300. {
  4301.    struct ilo_buffer *buf = ilo_buffer(so->buffer);
  4302.    unsigned bo_offset, struct_size;
  4303.    enum pipe_format elem_format;
  4304.    struct ilo_view_surface surf;
  4305.  
  4306.    ILO_GPE_VALID_GEN(dev, 6, 6);
  4307.  
  4308.    bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
  4309.    struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
  4310.  
  4311.    switch (so_info->output[so_index].num_components) {
  4312.    case 1:
  4313.       elem_format = PIPE_FORMAT_R32_FLOAT;
  4314.       break;
  4315.    case 2:
  4316.       elem_format = PIPE_FORMAT_R32G32_FLOAT;
  4317.       break;
  4318.    case 3:
  4319.       elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
  4320.       break;
  4321.    case 4:
  4322.       elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
  4323.       break;
  4324.    default:
  4325.       assert(!"unexpected SO components length");
  4326.       elem_format = PIPE_FORMAT_R32_FLOAT;
  4327.       break;
  4328.    }
  4329.  
  4330.    ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
  4331.          struct_size, elem_format, false, true, &surf);
  4332.  
  4333.    return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
  4334. }
  4335.  
  4336. static void
  4337. sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
  4338.                                const union pipe_color_union *color,
  4339.                                uint32_t *dw, int num_dwords)
  4340. {
  4341.    float rgba[4] = {
  4342.       color->f[0], color->f[1], color->f[2], color->f[3],
  4343.    };
  4344.  
  4345.    ILO_GPE_VALID_GEN(dev, 6, 6);
  4346.  
  4347.    assert(num_dwords >= 12);
  4348.  
  4349.    /*
  4350.     * This state is not documented in the Sandy Bridge PRM, but in the
  4351.     * Ironlake PRM.  SNORM8 seems to be in DW11 instead of DW1.
  4352.     */
  4353.  
  4354.    /* IEEE_FP */
  4355.    dw[1] = fui(rgba[0]);
  4356.    dw[2] = fui(rgba[1]);
  4357.    dw[3] = fui(rgba[2]);
  4358.    dw[4] = fui(rgba[3]);
  4359.  
  4360.    /* FLOAT_16 */
  4361.    dw[5] = util_float_to_half(rgba[0]) |
  4362.            util_float_to_half(rgba[1]) << 16;
  4363.    dw[6] = util_float_to_half(rgba[2]) |
  4364.            util_float_to_half(rgba[3]) << 16;
  4365.  
  4366.    /* clamp to [-1.0f, 1.0f] */
  4367.    rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
  4368.    rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
  4369.    rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
  4370.    rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
  4371.  
  4372.    /* SNORM16 */
  4373.    dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
  4374.             (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
  4375.    dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
  4376.             (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
  4377.  
  4378.    /* SNORM8 */
  4379.    dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
  4380.             (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
  4381.             (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
  4382.             (int8_t) util_iround(rgba[3] * 127.0f) << 24;
  4383.  
  4384.    /* clamp to [0.0f, 1.0f] */
  4385.    rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
  4386.    rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
  4387.    rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
  4388.    rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
  4389.  
  4390.    /* UNORM8 */
  4391.    dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
  4392.            (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
  4393.            (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
  4394.            (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
  4395.  
  4396.    /* UNORM16 */
  4397.    dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
  4398.            (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
  4399.    dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
  4400.            (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
  4401. }
  4402.  
  4403. void
  4404. ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
  4405.                          const struct pipe_sampler_state *state,
  4406.                          struct ilo_sampler_cso *sampler)
  4407. {
  4408.    int mip_filter, min_filter, mag_filter, max_aniso;
  4409.    int lod_bias, max_lod, min_lod;
  4410.    int wrap_s, wrap_t, wrap_r, wrap_cube;
  4411.    bool clamp_is_to_edge;
  4412.    uint32_t dw0, dw1, dw3;
  4413.  
  4414.    ILO_GPE_VALID_GEN(dev, 6, 7);
  4415.  
  4416.    memset(sampler, 0, sizeof(*sampler));
  4417.  
  4418.    mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
  4419.    min_filter = gen6_translate_tex_filter(state->min_img_filter);
  4420.    mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
  4421.  
  4422.    sampler->anisotropic = state->max_anisotropy;
  4423.  
  4424.    if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
  4425.       max_aniso = state->max_anisotropy / 2 - 1;
  4426.    else if (state->max_anisotropy > 16)
  4427.       max_aniso = BRW_ANISORATIO_16;
  4428.    else
  4429.       max_aniso = BRW_ANISORATIO_2;
  4430.  
  4431.    /*
  4432.     *
  4433.     * Here is how the hardware calculate per-pixel LOD, from my reading of the
  4434.     * PRMs:
  4435.     *
  4436.     *  1) LOD is set to log2(ratio of texels to pixels) if not specified in
  4437.     *     other ways.  The number of texels is measured using level
  4438.     *     SurfMinLod.
  4439.     *  2) Bias is added to LOD.
  4440.     *  3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
  4441.     *     compared with Base to determine whether magnification or
  4442.     *     minification is needed.  (if preclamp is disabled, LOD is compared
  4443.     *     with Base before clamping)
  4444.     *  4) If magnification is needed, or no mipmapping is requested, LOD is
  4445.     *     set to floor(MinLod).
  4446.     *  5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
  4447.     *
  4448.     * With Gallium interface, Base is always zero and
  4449.     * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
  4450.     */
  4451.    if (dev->gen >= ILO_GEN(7)) {
  4452.       const float scale = 256.0f;
  4453.  
  4454.       /* [-16.0, 16.0) in S4.8 */
  4455.       lod_bias = (int)
  4456.          (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
  4457.       lod_bias &= 0x1fff;
  4458.  
  4459.       /* [0.0, 14.0] in U4.8 */
  4460.       max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
  4461.       min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
  4462.    }
  4463.    else {
  4464.       const float scale = 64.0f;
  4465.  
  4466.       /* [-16.0, 16.0) in S4.6 */
  4467.       lod_bias = (int)
  4468.          (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
  4469.       lod_bias &= 0x7ff;
  4470.  
  4471.       /* [0.0, 13.0] in U4.6 */
  4472.       max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
  4473.       min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
  4474.    }
  4475.  
  4476.    /*
  4477.     * We want LOD to be clamped to determine magnification/minification, and
  4478.     * get set to zero when it is magnification or when mipmapping is disabled.
  4479.     * The hardware would set LOD to floor(MinLod) and that is a problem when
  4480.     * MinLod is greater than or equal to 1.0f.
  4481.     *
  4482.     * With Base being zero, it is always minification when MinLod is non-zero.
  4483.     * To achieve our goal, we just need to set MinLod to zero and set
  4484.     * MagFilter to MinFilter when mipmapping is disabled.
  4485.     */
  4486.    if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
  4487.       min_lod = 0;
  4488.       mag_filter = min_filter;
  4489.    }
  4490.  
  4491.    /*
  4492.     * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
  4493.     * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering, PIPE_TEX_WRAP_CLAMP
  4494.     * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
  4495.     * texture coordinates to [0.0, 1.0].
  4496.     *
  4497.     * The clamping will be taken care of in the shaders.  There are two
  4498.     * filters here, but let the minification one has a say.
  4499.     */
  4500.    clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
  4501.    if (!clamp_is_to_edge) {
  4502.       sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
  4503.       sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
  4504.       sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
  4505.    }
  4506.  
  4507.    /* determine wrap s/t/r */
  4508.    wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
  4509.    wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
  4510.    wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
  4511.  
  4512.    /*
  4513.     * From the Sandy Bridge PRM, volume 4 part 1, page 107:
  4514.     *
  4515.     *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
  4516.     *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
  4517.     *      must have the same Address Control mode."
  4518.     *
  4519.     * From the Ivy Bridge PRM, volume 4 part 1, page 96:
  4520.     *
  4521.     *     "This field (Cube Surface Control Mode) must be set to
  4522.     *      CUBECTRLMODE_PROGRAMMED"
  4523.     *
  4524.     * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
  4525.     * map filtering.
  4526.     */
  4527.    if (state->seamless_cube_map &&
  4528.        (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
  4529.         state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
  4530.       wrap_cube = BRW_TEXCOORDMODE_CUBE;
  4531.    }
  4532.    else {
  4533.       wrap_cube = BRW_TEXCOORDMODE_CLAMP;
  4534.    }
  4535.  
  4536.    if (!state->normalized_coords) {
  4537.       /*
  4538.        * From the Ivy Bridge PRM, volume 4 part 1, page 98:
  4539.        *
  4540.        *     "The following state must be set as indicated if this field
  4541.        *      (Non-normalized Coordinate Enable) is enabled:
  4542.        *
  4543.        *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
  4544.        *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
  4545.        *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
  4546.        *      - Mag Mode Filter must be MAPFILTER_NEAREST or
  4547.        *        MAPFILTER_LINEAR.
  4548.        *      - Min Mode Filter must be MAPFILTER_NEAREST or
  4549.        *        MAPFILTER_LINEAR.
  4550.        *      - Mip Mode Filter must be MIPFILTER_NONE.
  4551.        *      - Min LOD must be 0.
  4552.        *      - Max LOD must be 0.
  4553.        *      - MIP Count must be 0.
  4554.        *      - Surface Min LOD must be 0.
  4555.        *      - Texture LOD Bias must be 0."
  4556.        */
  4557.       assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
  4558.              wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
  4559.       assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
  4560.              wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
  4561.       assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
  4562.              wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
  4563.  
  4564.       assert(mag_filter == BRW_MAPFILTER_NEAREST ||
  4565.              mag_filter == BRW_MAPFILTER_LINEAR);
  4566.       assert(min_filter == BRW_MAPFILTER_NEAREST ||
  4567.              min_filter == BRW_MAPFILTER_LINEAR);
  4568.  
  4569.       /* work around a bug in util_blitter */
  4570.       mip_filter = BRW_MIPFILTER_NONE;
  4571.  
  4572.       assert(mip_filter == BRW_MIPFILTER_NONE);
  4573.    }
  4574.  
  4575.    if (dev->gen >= ILO_GEN(7)) {
  4576.       dw0 = 1 << 28 |
  4577.             mip_filter << 20 |
  4578.             lod_bias << 1;
  4579.  
  4580.       sampler->dw_filter = mag_filter << 17 |
  4581.                            min_filter << 14;
  4582.  
  4583.       sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
  4584.                                  BRW_MAPFILTER_ANISOTROPIC << 14 |
  4585.                                  1;
  4586.  
  4587.       dw1 = min_lod << 20 |
  4588.             max_lod << 8;
  4589.  
  4590.       if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
  4591.          dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
  4592.  
  4593.       dw3 = max_aniso << 19;
  4594.  
  4595.       /* round the coordinates for linear filtering */
  4596.       if (min_filter != BRW_MAPFILTER_NEAREST) {
  4597.          dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
  4598.                  BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
  4599.                  BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
  4600.       }
  4601.       if (mag_filter != BRW_MAPFILTER_NEAREST) {
  4602.          dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
  4603.                  BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
  4604.                  BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
  4605.       }
  4606.  
  4607.       if (!state->normalized_coords)
  4608.          dw3 |= 1 << 10;
  4609.  
  4610.       sampler->dw_wrap = wrap_s << 6 |
  4611.                          wrap_t << 3 |
  4612.                          wrap_r;
  4613.  
  4614.       /*
  4615.        * As noted in the classic i965 driver, the HW may still reference
  4616.        * wrap_t and wrap_r for 1D textures.  We need to set them to a safe
  4617.        * mode
  4618.        */
  4619.       sampler->dw_wrap_1d = wrap_s << 6 |
  4620.                             BRW_TEXCOORDMODE_WRAP << 3 |
  4621.                             BRW_TEXCOORDMODE_WRAP;
  4622.  
  4623.       sampler->dw_wrap_cube = wrap_cube << 6 |
  4624.                               wrap_cube << 3 |
  4625.                               wrap_cube;
  4626.  
  4627.       STATIC_ASSERT(Elements(sampler->payload) >= 7);
  4628.  
  4629.       sampler->payload[0] = dw0;
  4630.       sampler->payload[1] = dw1;
  4631.       sampler->payload[2] = dw3;
  4632.  
  4633.       memcpy(&sampler->payload[3],
  4634.             state->border_color.ui, sizeof(state->border_color.ui));
  4635.    }
  4636.    else {
  4637.       dw0 = 1 << 28 |
  4638.             mip_filter << 20 |
  4639.             lod_bias << 3;
  4640.  
  4641.       if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
  4642.          dw0 |= gen6_translate_shadow_func(state->compare_func);
  4643.  
  4644.       sampler->dw_filter = (min_filter != mag_filter) << 27 |
  4645.                            mag_filter << 17 |
  4646.                            min_filter << 14;
  4647.  
  4648.       sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
  4649.                                  BRW_MAPFILTER_ANISOTROPIC << 14;
  4650.  
  4651.       dw1 = min_lod << 22 |
  4652.             max_lod << 12;
  4653.  
  4654.       sampler->dw_wrap = wrap_s << 6 |
  4655.                          wrap_t << 3 |
  4656.                          wrap_r;
  4657.  
  4658.       sampler->dw_wrap_1d = wrap_s << 6 |
  4659.                             BRW_TEXCOORDMODE_WRAP << 3 |
  4660.                             BRW_TEXCOORDMODE_WRAP;
  4661.  
  4662.       sampler->dw_wrap_cube = wrap_cube << 6 |
  4663.                               wrap_cube << 3 |
  4664.                               wrap_cube;
  4665.  
  4666.       dw3 = max_aniso << 19;
  4667.  
  4668.       /* round the coordinates for linear filtering */
  4669.       if (min_filter != BRW_MAPFILTER_NEAREST) {
  4670.          dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
  4671.                  BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
  4672.                  BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
  4673.       }
  4674.       if (mag_filter != BRW_MAPFILTER_NEAREST) {
  4675.          dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
  4676.                  BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
  4677.                  BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
  4678.       }
  4679.  
  4680.       if (!state->normalized_coords)
  4681.          dw3 |= 1;
  4682.  
  4683.       STATIC_ASSERT(Elements(sampler->payload) >= 15);
  4684.  
  4685.       sampler->payload[0] = dw0;
  4686.       sampler->payload[1] = dw1;
  4687.       sampler->payload[2] = dw3;
  4688.  
  4689.       sampler_init_border_color_gen6(dev,
  4690.             &state->border_color, &sampler->payload[3], 12);
  4691.    }
  4692. }
  4693.  
  4694. static uint32_t
  4695. gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
  4696.                         const struct ilo_sampler_cso * const *samplers,
  4697.                         const struct pipe_sampler_view * const *views,
  4698.                         const uint32_t *sampler_border_colors,
  4699.                         int num_samplers,
  4700.                         struct ilo_cp *cp)
  4701. {
  4702.    const int state_align = 32 / 4;
  4703.    const int state_len = 4 * num_samplers;
  4704.    uint32_t state_offset, *dw;
  4705.    int i;
  4706.  
  4707.    ILO_GPE_VALID_GEN(dev, 6, 7);
  4708.  
  4709.    /*
  4710.     * From the Sandy Bridge PRM, volume 4 part 1, page 101:
  4711.     *
  4712.     *     "The sampler state is stored as an array of up to 16 elements..."
  4713.     */
  4714.    assert(num_samplers <= 16);
  4715.  
  4716.    if (!num_samplers)
  4717.       return 0;
  4718.  
  4719.    dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
  4720.          state_len, state_align, &state_offset);
  4721.  
  4722.    for (i = 0; i < num_samplers; i++) {
  4723.       const struct ilo_sampler_cso *sampler = samplers[i];
  4724.       const struct pipe_sampler_view *view = views[i];
  4725.       const uint32_t border_color = sampler_border_colors[i];
  4726.       uint32_t dw_filter, dw_wrap;
  4727.  
  4728.       /* there may be holes */
  4729.       if (!sampler || !view) {
  4730.          /* disabled sampler */
  4731.          dw[0] = 1 << 31;
  4732.          dw[1] = 0;
  4733.          dw[2] = 0;
  4734.          dw[3] = 0;
  4735.          dw += 4;
  4736.  
  4737.          continue;
  4738.       }
  4739.  
  4740.       /* determine filter and wrap modes */
  4741.       switch (view->texture->target) {
  4742.       case PIPE_TEXTURE_1D:
  4743.          dw_filter = (sampler->anisotropic) ?
  4744.             sampler->dw_filter_aniso : sampler->dw_filter;
  4745.          dw_wrap = sampler->dw_wrap_1d;
  4746.          break;
  4747.       case PIPE_TEXTURE_3D:
  4748.          /*
  4749.           * From the Sandy Bridge PRM, volume 4 part 1, page 103:
  4750.           *
  4751.           *     "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
  4752.           *      surfaces of type SURFTYPE_3D."
  4753.           */
  4754.          dw_filter = sampler->dw_filter;
  4755.          dw_wrap = sampler->dw_wrap;
  4756.          break;
  4757.       case PIPE_TEXTURE_CUBE:
  4758.          dw_filter = (sampler->anisotropic) ?
  4759.             sampler->dw_filter_aniso : sampler->dw_filter;
  4760.          dw_wrap = sampler->dw_wrap_cube;
  4761.          break;
  4762.       default:
  4763.          dw_filter = (sampler->anisotropic) ?
  4764.             sampler->dw_filter_aniso : sampler->dw_filter;
  4765.          dw_wrap = sampler->dw_wrap;
  4766.          break;
  4767.       }
  4768.  
  4769.       dw[0] = sampler->payload[0];
  4770.       dw[1] = sampler->payload[1];
  4771.       assert(!(border_color & 0x1f));
  4772.       dw[2] = border_color;
  4773.       dw[3] = sampler->payload[2];
  4774.  
  4775.       dw[0] |= dw_filter;
  4776.  
  4777.       if (dev->gen >= ILO_GEN(7)) {
  4778.          dw[3] |= dw_wrap;
  4779.       }
  4780.       else {
  4781.          /*
  4782.           * From the Sandy Bridge PRM, volume 4 part 1, page 21:
  4783.           *
  4784.           *     "[DevSNB] Errata: Incorrect behavior is observed in cases
  4785.           *      where the min and mag mode filters are different and
  4786.           *      SurfMinLOD is nonzero. The determination of MagMode uses the
  4787.           *      following equation instead of the one in the above
  4788.           *      pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
  4789.           *
  4790.           * As a way to work around that, we set Base to
  4791.           * view->u.tex.first_level.
  4792.           */
  4793.          dw[0] |= view->u.tex.first_level << 22;
  4794.  
  4795.          dw[1] |= dw_wrap;
  4796.       }
  4797.  
  4798.       dw += 4;
  4799.    }
  4800.  
  4801.    return state_offset;
  4802. }
  4803.  
  4804. static uint32_t
  4805. gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
  4806.                                      const struct ilo_sampler_cso *sampler,
  4807.                                      struct ilo_cp *cp)
  4808. {
  4809.    const int state_align = 32 / 4;
  4810.    const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
  4811.    uint32_t state_offset, *dw;
  4812.  
  4813.    ILO_GPE_VALID_GEN(dev, 6, 7);
  4814.  
  4815.    dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
  4816.          state_len, state_align, &state_offset);
  4817.  
  4818.    /* see ilo_gpe_init_sampler_cso() */
  4819.    memcpy(dw, &sampler->payload[3], state_len * 4);
  4820.  
  4821.    return state_offset;
  4822. }
  4823.  
  4824. static uint32_t
  4825. gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
  4826.                                int size, void **pcb,
  4827.                                struct ilo_cp *cp)
  4828. {
  4829.    /*
  4830.     * For all VS, GS, FS, and CS push constant buffers, they must be aligned
  4831.     * to 32 bytes, and their sizes are specified in 256-bit units.
  4832.     */
  4833.    const int state_align = 32 / 4;
  4834.    const int state_len = align(size, 32) / 4;
  4835.    uint32_t state_offset;
  4836.    char *buf;
  4837.  
  4838.    ILO_GPE_VALID_GEN(dev, 6, 7);
  4839.  
  4840.    buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
  4841.          state_len, state_align, &state_offset);
  4842.  
  4843.    /* zero out the unused range */
  4844.    if (size < state_len * 4)
  4845.       memset(&buf[size], 0, state_len * 4 - size);
  4846.  
  4847.    if (pcb)
  4848.       *pcb = buf;
  4849.  
  4850.    return state_offset;
  4851. }
  4852.  
  4853. static int
  4854. gen6_estimate_command_size(const struct ilo_dev_info *dev,
  4855.                            enum ilo_gpe_gen6_command cmd,
  4856.                            int arg)
  4857. {
  4858.    static const struct {
  4859.       int header;
  4860.       int body;
  4861.    } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
  4862.       [ILO_GPE_GEN6_STATE_BASE_ADDRESS]                       = { 0,  10 },
  4863.       [ILO_GPE_GEN6_STATE_SIP]                                = { 0,  2  },
  4864.       [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS]                    = { 0,  1  },
  4865.       [ILO_GPE_GEN6_PIPELINE_SELECT]                          = { 0,  1  },
  4866.       [ILO_GPE_GEN6_MEDIA_VFE_STATE]                          = { 0,  8  },
  4867.       [ILO_GPE_GEN6_MEDIA_CURBE_LOAD]                         = { 0,  4  },
  4868.       [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD]          = { 0,  4  },
  4869.       [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE]                      = { 0,  2  },
  4870.       [ILO_GPE_GEN6_MEDIA_STATE_FLUSH]                        = { 0,  2  },
  4871.       [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER]                      = { 17, 1  },
  4872.       [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS]           = { 0,  4  },
  4873.       [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS]           = { 0,  4  },
  4874.       [ILO_GPE_GEN6_3DSTATE_URB]                              = { 0,  3  },
  4875.       [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS]                   = { 1,  4  },
  4876.       [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS]                  = { 1,  2  },
  4877.       [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER]                     = { 0,  3  },
  4878.       [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS]          = { 0,  4  },
  4879.       [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS]                = { 0,  4  },
  4880.       [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS]           = { 0,  2  },
  4881.       [ILO_GPE_GEN6_3DSTATE_VS]                               = { 0,  6  },
  4882.       [ILO_GPE_GEN6_3DSTATE_GS]                               = { 0,  7  },
  4883.       [ILO_GPE_GEN6_3DSTATE_CLIP]                             = { 0,  4  },
  4884.       [ILO_GPE_GEN6_3DSTATE_SF]                               = { 0,  20 },
  4885.       [ILO_GPE_GEN6_3DSTATE_WM]                               = { 0,  9  },
  4886.       [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS]                      = { 0,  5  },
  4887.       [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS]                      = { 0,  5  },
  4888.       [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS]                      = { 0,  5  },
  4889.       [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK]                      = { 0,  2  },
  4890.       [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE]                = { 0,  4  },
  4891.       [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER]                     = { 0,  7  },
  4892.       [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET]              = { 0,  2  },
  4893.       [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN]             = { 0,  33 },
  4894.       [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE]                     = { 0,  3  },
  4895.       [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS]               = { 0,  3  },
  4896.       [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX]                     = { 0,  4  },
  4897.       [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE]                      = { 0,  3  },
  4898.       [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER]                   = { 0,  3  },
  4899.       [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER]                = { 0,  3  },
  4900.       [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS]                     = { 0,  2  },
  4901.       [ILO_GPE_GEN6_PIPE_CONTROL]                             = { 0,  5  },
  4902.       [ILO_GPE_GEN6_3DPRIMITIVE]                              = { 0,  6  },
  4903.    };
  4904.    const int header = gen6_command_size_table[cmd].header;
  4905.    const int body = gen6_command_size_table[arg].body;
  4906.    const int count = arg;
  4907.  
  4908.    ILO_GPE_VALID_GEN(dev, 6, 6);
  4909.    assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
  4910.  
  4911.    return (likely(count)) ? header + body * count : 0;
  4912. }
  4913.  
  4914. static int
  4915. gen6_estimate_state_size(const struct ilo_dev_info *dev,
  4916.                          enum ilo_gpe_gen6_state state,
  4917.                          int arg)
  4918. {
  4919.    static const struct {
  4920.       int alignment;
  4921.       int body;
  4922.       bool is_array;
  4923.    } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
  4924.       [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA]          = { 8,  8,  true },
  4925.       [ILO_GPE_GEN6_SF_VIEWPORT]                        = { 8,  8,  true },
  4926.       [ILO_GPE_GEN6_CLIP_VIEWPORT]                      = { 8,  4,  true },
  4927.       [ILO_GPE_GEN6_CC_VIEWPORT]                        = { 8,  2,  true },
  4928.       [ILO_GPE_GEN6_COLOR_CALC_STATE]                   = { 16, 6,  false },
  4929.       [ILO_GPE_GEN6_BLEND_STATE]                        = { 16, 2,  true },
  4930.       [ILO_GPE_GEN6_DEPTH_STENCIL_STATE]                = { 16, 3,  false },
  4931.       [ILO_GPE_GEN6_SCISSOR_RECT]                       = { 8,  2,  true },
  4932.       [ILO_GPE_GEN6_BINDING_TABLE_STATE]                = { 8,  1,  true },
  4933.       [ILO_GPE_GEN6_SURFACE_STATE]                      = { 8,  6,  false },
  4934.       [ILO_GPE_GEN6_SAMPLER_STATE]                      = { 8,  4,  true },
  4935.       [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE]         = { 8,  12, false },
  4936.       [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER]               = { 8,  1,  true },
  4937.    };
  4938.    const int alignment = gen6_state_size_table[state].alignment;
  4939.    const int body = gen6_state_size_table[state].body;
  4940.    const bool is_array = gen6_state_size_table[state].is_array;
  4941.    const int count = arg;
  4942.    int estimate;
  4943.  
  4944.    ILO_GPE_VALID_GEN(dev, 6, 6);
  4945.    assert(state < ILO_GPE_GEN6_STATE_COUNT);
  4946.  
  4947.    if (likely(count)) {
  4948.       if (is_array) {
  4949.          estimate = (alignment - 1) + body * count;
  4950.       }
  4951.       else {
  4952.          estimate = (alignment - 1) + body;
  4953.          /* all states are aligned */
  4954.          if (count > 1)
  4955.             estimate += util_align_npot(body, alignment) * (count - 1);
  4956.       }
  4957.    }
  4958.    else {
  4959.       estimate = 0;
  4960.    }
  4961.  
  4962.    return estimate;
  4963. }
  4964.  
  4965. static const struct ilo_gpe_gen6 gen6_gpe = {
  4966.    .estimate_command_size = gen6_estimate_command_size,
  4967.    .estimate_state_size = gen6_estimate_state_size,
  4968.  
  4969. #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
  4970.    GEN6_SET(STATE_BASE_ADDRESS),
  4971.    GEN6_SET(STATE_SIP),
  4972.    GEN6_SET(3DSTATE_VF_STATISTICS),
  4973.    GEN6_SET(PIPELINE_SELECT),
  4974.    GEN6_SET(MEDIA_VFE_STATE),
  4975.    GEN6_SET(MEDIA_CURBE_LOAD),
  4976.    GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
  4977.    GEN6_SET(MEDIA_GATEWAY_STATE),
  4978.    GEN6_SET(MEDIA_STATE_FLUSH),
  4979.    GEN6_SET(MEDIA_OBJECT_WALKER),
  4980.    GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
  4981.    GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
  4982.    GEN6_SET(3DSTATE_URB),
  4983.    GEN6_SET(3DSTATE_VERTEX_BUFFERS),
  4984.    GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
  4985.    GEN6_SET(3DSTATE_INDEX_BUFFER),
  4986.    GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
  4987.    GEN6_SET(3DSTATE_CC_STATE_POINTERS),
  4988.    GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
  4989.    GEN6_SET(3DSTATE_VS),
  4990.    GEN6_SET(3DSTATE_GS),
  4991.    GEN6_SET(3DSTATE_CLIP),
  4992.    GEN6_SET(3DSTATE_SF),
  4993.    GEN6_SET(3DSTATE_WM),
  4994.    GEN6_SET(3DSTATE_CONSTANT_VS),
  4995.    GEN6_SET(3DSTATE_CONSTANT_GS),
  4996.    GEN6_SET(3DSTATE_CONSTANT_PS),
  4997.    GEN6_SET(3DSTATE_SAMPLE_MASK),
  4998.    GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
  4999.    GEN6_SET(3DSTATE_DEPTH_BUFFER),
  5000.    GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
  5001.    GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
  5002.    GEN6_SET(3DSTATE_LINE_STIPPLE),
  5003.    GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
  5004.    GEN6_SET(3DSTATE_GS_SVB_INDEX),
  5005.    GEN6_SET(3DSTATE_MULTISAMPLE),
  5006.    GEN6_SET(3DSTATE_STENCIL_BUFFER),
  5007.    GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
  5008.    GEN6_SET(3DSTATE_CLEAR_PARAMS),
  5009.    GEN6_SET(PIPE_CONTROL),
  5010.    GEN6_SET(3DPRIMITIVE),
  5011.    GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
  5012.    GEN6_SET(SF_VIEWPORT),
  5013.    GEN6_SET(CLIP_VIEWPORT),
  5014.    GEN6_SET(CC_VIEWPORT),
  5015.    GEN6_SET(COLOR_CALC_STATE),
  5016.    GEN6_SET(BLEND_STATE),
  5017.    GEN6_SET(DEPTH_STENCIL_STATE),
  5018.    GEN6_SET(SCISSOR_RECT),
  5019.    GEN6_SET(BINDING_TABLE_STATE),
  5020.    GEN6_SET(SURFACE_STATE),
  5021.    GEN6_SET(so_SURFACE_STATE),
  5022.    GEN6_SET(SAMPLER_STATE),
  5023.    GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
  5024.    GEN6_SET(push_constant_buffer),
  5025. #undef GEN6_SET
  5026. };
  5027.  
  5028. const struct ilo_gpe_gen6 *
  5029. ilo_gpe_gen6_get(void)
  5030. {
  5031.    return &gen6_gpe;
  5032. }
  5033.